diff --git a/README.md b/README.md
index 6cc7a7d11..469065610 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@
 
 **General Information** | |
 --- | ---
-**Repository** | [![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Conda Recipe](https://img.shields.io/badge/recipe-atom--ml-green.svg)](https://anaconda.org/conda-forge/atom-ml) [![License: MIT](https://img.shields.io/github/license/tvdboom/ATOM)](https://opensource.org/licenses/MIT) [![Downloads](https://pepy.tech/badge/atom-ml)](https://pepy.tech/project/atom-ml)
+**Repository** | [![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Conda Recipe](https://img.shields.io/badge/recipe-atom--ml-green.svg)](https://anaconda.org/conda-forge/atom-ml) [![License: MIT](https://img.shields.io/github/license/tvdboom/ATOM)](https://opensource.org/licenses/MIT) [![Downloads](https://static.pepy.tech/badge/atom-ml)](https://pepy.tech/project/atom-ml)
 **Release** | [![pdm-managed](https://img.shields.io/badge/pdm-managed-blueviolet)](https://pdm.fming.dev) [![PyPI version](https://img.shields.io/pypi/v/atom-ml)](https://pypi.org/project/atom-ml/) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/atom-ml.svg)](https://anaconda.org/conda-forge/atom-ml) [![DOI](https://zenodo.org/badge/195069958.svg)](https://zenodo.org/badge/latestdoi/195069958)
 **Compatibility** | [![Python 3.8\|3.9\|3.10\|3.11](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue?logo=python)](https://www.python.org) [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/atom-ml.svg)](https://anaconda.org/conda-forge/atom-ml)
 **Build status** | [![Build Status](https://github.com/tvdboom/ATOM/workflows/ATOM/badge.svg)](https://github.com/tvdboom/ATOM/actions) [![Azure Pipelines](https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/atom-ml-feedstock?branchName=master)](https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=10822&branchName=master) [![codecov](https://codecov.io/gh/tvdboom/ATOM/branch/master/graph/badge.svg)](https://codecov.io/gh/tvdboom/ATOM)
diff --git a/atom/api.py b/atom/api.py
index 8eea06ddf..2cbba7db3 100644
--- a/atom/api.py
+++ b/atom/api.py
@@ -18,8 +18,8 @@
 from atom.atom import ATOM
 from atom.basetransformer import BaseTransformer
 from atom.utils.types import (
-    BACKEND, BOOL, ENGINE, GOAL, INDEX_SELECTOR, INT, PREDICTOR, SCALAR,
-    TARGET,
+    BACKEND, BOOL, ENGINE, INDEX_SELECTOR, INT, PREDICTOR, SCALAR,
+    TARGET, WARNINGS,
 )
 
 
@@ -160,7 +160,6 @@ class ATOMClassifier(BaseTransformer, ATOM):
     y: int, str, dict, sequence or dataframe, default=-1
         Target column corresponding to X.
 
-        - If None: y is ignored.
         - If int: Position of the target column in X.
         - If str: Name of the target column in X.
         - If sequence: Target array with shape=(n_samples,) or
@@ -336,7 +335,7 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: BACKEND = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
@@ -353,7 +352,7 @@ def __init__(
             random_state=random_state,
         )
 
-        self.goal: GOAL = "class"
+        self.goal = "class"
         ATOM.__init__(
             self,
             arrays=arrays,
@@ -555,7 +554,7 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: BACKEND = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
@@ -572,7 +571,7 @@ def __init__(
             random_state=random_state,
         )
 
-        self.goal: GOAL = "fc"
+        self.goal = "fc"
         ATOM.__init__(
             self,
             arrays=arrays,
@@ -790,7 +789,7 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: BACKEND = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
@@ -807,7 +806,7 @@ def __init__(
             random_state=random_state,
         )
 
-        self.goal: GOAL = "reg"
+        self.goal = "reg"
         ATOM.__init__(
             self,
             arrays=arrays,
diff --git a/atom/atom.py b/atom/atom.py
index 817f51fb8..73127f3e9 100644
--- a/atom/atom.py
+++ b/atom/atom.py
@@ -36,9 +36,7 @@
 )
 from atom.models import MODELS
 from atom.nlp import TextCleaner, TextNormalizer, Tokenizer, Vectorizer
-from atom.plots import (
-    DataPlot, FeatureSelectorPlot, HTPlot, PredictionPlot, ShapPlot,
-)
+from atom.plots import ATOMPlot
 from atom.training import (
     DirectClassifier, DirectForecaster, DirectRegressor,
     SuccessiveHalvingClassifier, SuccessiveHalvingForecaster,
@@ -47,9 +45,10 @@
 )
 from atom.utils.constants import MISSING_VALUES, __version__
 from atom.utils.types import (
-    BOOL, DATAFRAME, DATASET, FEATURES, INDEX, INDEX_SELECTOR, INT,
-    METRIC_SELECTOR, PANDAS, PREDICTOR, RUNNER, SCALAR, SEQUENCE, SERIES,
-    SLICE, TARGET, TRANSFORMER, TS_INDEX_TYPES,
+    BOOL, DATAFRAME, DATASET, DISCRETIZER_STRATS, ESTIMATOR, FEATURES, INDEX,
+    INDEX_SELECTOR, INT, METRIC_SELECTOR, PANDAS, PREDICTOR, PRUNER_STRATS,
+    RUNNER, SCALAR, SCALER_STRATS, SEQUENCE, SERIES, SLICE, STRAT_NUM, TARGET,
+    TRANSFORMER, TS_INDEX_TYPES,
 )
 from atom.utils.utils import (
     ClassMap, DataConfig, check_dependency, check_is_fitted, check_scaling,
@@ -60,7 +59,7 @@
 
 
 @typechecked
-class ATOM(BaseRunner, FeatureSelectorPlot, DataPlot, HTPlot, PredictionPlot, ShapPlot):
+class ATOM(BaseRunner, ATOMPlot):
     """ATOM base class.
 
     The ATOM class is a convenient wrapper for all data cleaning,
@@ -160,7 +159,7 @@ def __repr__(self) -> str:
 
         return out
 
-    def __iter__(self) -> TRANSFORMER:
+    def __iter__(self) -> TRANSFORMER | None:
         yield from self.pipeline.values
 
     # Utility properties =========================================== >>
@@ -545,7 +544,7 @@ def inverse_transform(
         y: TARGET | None = None,
         *,
         verbose: INT | None = None,
-    ) -> PANDAS | tuple[DATAFRAME, SERIES]:
+    ) -> PANDAS | tuple[DATAFRAME, PANDAS]:
         """Inversely transform new data through the pipeline.
 
         Transformers that are only applied on the training set are
@@ -898,7 +897,7 @@ def get_data(new_t: str) -> SERIES:
                 get_data(r[0]) for r in t if r[1] <= column.min() and r[2] >= column.max()
             )
 
-        if self.engine["data"] == "pyarrow":
+        if self.engine.get("data") == "pyarrow":
             self.branch.dataset = self.branch.dataset.astype(
                 {name: to_pyarrow(col) for name, col in self.branch._data.items()}
             )
@@ -986,7 +985,7 @@ def transform(
         y: TARGET | None = None,
         *,
         verbose: INT | None = None,
-    ) -> PANDAS | tuple[DATAFRAME, SERIES]:
+    ) -> PANDAS | tuple[DATAFRAME, PANDAS]:
         """Transform new data through the pipeline.
 
         Transformers that are only applied on the training set are
@@ -1068,7 +1067,7 @@ def _add_transformer(
         self,
         transformer: TRANSFORMER,
         columns: SLICE | None = None,
-        train_only: bool = False,
+        train_only: BOOL = False,
         **fit_params,
     ):
         """Add a transformer to the pipeline.
@@ -1106,9 +1105,6 @@ def _add_transformer(
                 "new branch to continue the pipeline."
             )
 
-        if not hasattr(transformer, "transform"):
-            raise AttributeError("Added transformers should have a transform method!")
-
         # Add BaseTransformer params to the estimator if left to default
         transformer = self._inherit(transformer)
 
@@ -1160,7 +1156,7 @@ def add(
         transformer: TRANSFORMER,
         *,
         columns: SLICE | None = None,
-        train_only: bool = False,
+        train_only: BOOL = False,
         **fit_params,
     ):
         """Add a transformer to the pipeline.
@@ -1249,9 +1245,8 @@ def apply(
     ):
         """Apply a function to the dataset.
 
-        The function should have signature `func(dataset, **kw_args) ->
-        dataset`. This method is useful for stateless transformations
-        such as taking the log, doing custom scaling, etc...
+        This method is useful for stateless transformations such as
+        taking the log, doing custom scaling, etc...
 
         !!! note
             This approach is preferred over changing the dataset directly
@@ -1265,7 +1260,8 @@ def apply(
         Parameters
         ----------
         func: callable
-            Function to apply.
+            Function to apply with signature `func(dataset, **kw_args) ->
+            dataset`.
 
         inverse_func: callable or None, default=None
             Inverse function of `func`. If None, the inverse_transform
@@ -1336,13 +1332,13 @@ def balance(self, strategy: str = "adasyn", **kwargs):
     def clean(
         self,
         *,
-        convert_dtypes: bool = True,
+        convert_dtypes: BOOL = True,
         drop_dtypes: str | SEQUENCE | None = None,
         drop_chars: str | None = None,
-        strip_categorical: bool = True,
-        drop_duplicates: bool = False,
-        drop_missing_target: bool = True,
-        encode_target: bool = True,
+        strip_categorical: BOOL = True,
+        drop_duplicates: BOOL = False,
+        drop_missing_target: BOOL = True,
+        encode_target: BOOL = True,
         **kwargs,
     ):
         """Applies standard data cleaning steps on the dataset.
@@ -1382,7 +1378,7 @@ def clean(
     @composed(crash, method_to_log)
     def discretize(
         self,
-        strategy: str = "quantile",
+        strategy: DISCRETIZER_STRATS = "quantile",
         *,
         bins: INT | SEQUENCE | dict = 5,
         labels: SEQUENCE | dict | None = None,
@@ -1467,7 +1463,7 @@ def encode(
     @composed(crash, method_to_log)
     def impute(
         self,
-        strat_num: SCALAR | Literal["drop", "mean", "knn", "most_frequent"] = "drop",
+        strat_num: STRAT_NUM = "drop",
         strat_cat: Literal["drop", "most_frequent"] | str = "drop",
         *,
         max_nan_rows: SCALAR | None = None,
@@ -1539,11 +1535,11 @@ def normalize(
     @composed(crash, method_to_log)
     def prune(
         self,
-        strategy: str | SEQUENCE = "zscore",
+        strategy: PRUNER_STRATS | SEQUENCE = "zscore",
         *,
         method: SCALAR | Literal["drop", "minmax"] = "drop",
         max_sigma: SCALAR = 3,
-        include_target: bool = False,
+        include_target: BOOL = False,
         **kwargs,
     ):
         """Prune outliers from the training set.
@@ -1581,7 +1577,12 @@ def prune(
                 setattr(self.branch, strat.lower(), getattr(pruner, strat.lower()))
 
     @composed(crash, method_to_log)
-    def scale(self, strategy: str = "standard", include_binary: bool = False, **kwargs):
+    def scale(
+        self,
+        strategy: SCALER_STRATS = "standard",
+        include_binary: BOOL = False,
+        **kwargs,
+    ):
         """Scale the data.
 
         Apply one of sklearn's scalers. Categorical columns are ignored.
@@ -1611,19 +1612,19 @@ def scale(self, strategy: str = "standard", include_binary: bool = False, **kwar
     def textclean(
         self,
         *,
-        decode: bool = True,
-        lower_case: bool = True,
-        drop_email: bool = True,
+        decode: BOOL = True,
+        lower_case: BOOL = True,
+        drop_email: BOOL = True,
         regex_email: str | None = None,
-        drop_url: bool = True,
+        drop_url: BOOL = True,
         regex_url: str | None = None,
-        drop_html: bool = True,
+        drop_html: BOOL = True,
         regex_html: str | None = None,
-        drop_emoji: bool = True,
+        drop_emoji: BOOL = True,
         regex_emoji: str | None = None,
-        drop_number: bool = True,
+        drop_number: BOOL = True,
         regex_number: str | None = None,
-        drop_punctuation: bool = True,
+        drop_punctuation: BOOL = True,
         **kwargs,
     ):
         """Applies standard text cleaning to the corpus.
@@ -1664,10 +1665,10 @@ def textclean(
     def textnormalize(
         self,
         *,
-        stopwords: bool | str = True,
+        stopwords: BOOL | str = True,
         custom_stopwords: SEQUENCE | None = None,
-        stem: bool | str = False,
-        lemmatize: bool = True,
+        stem: BOOL | str = False,
+        lemmatize: BOOL = True,
         **kwargs,
     ):
         """Normalize the corpus.
@@ -1727,7 +1728,13 @@ def tokenize(
         self.branch.quadgrams = tokenizer.quadgrams
 
     @composed(crash, method_to_log)
-    def vectorize(self, strategy: str = "bow", *, return_sparse: bool = True, **kwargs):
+    def vectorize(
+        self,
+        strategy: Literal["bow", "tfidf", "hashing"] = "bow",
+        *,
+        return_sparse: BOOL = True,
+        **kwargs,
+    ):
         """Vectorize the corpus.
 
         Transform the corpus into meaningful vectors of numbers. The
@@ -1766,7 +1773,7 @@ def feature_extraction(
         fmt: str | SEQUENCE | None = None,
         *,
         encoding_type: str = "ordinal",
-        drop_columns: bool = True,
+        drop_columns: BOOL = True,
         **kwargs,
     ):
         """Extract features from datetime columns.
@@ -1831,7 +1838,7 @@ def feature_grouping(
         group: dict[str, str | SEQUENCE],
         *,
         operators: str | SEQUENCE | None = None,
-        drop_columns: bool = True,
+        drop_columns: BOOL = True,
         **kwargs,
     ):
         """Extract statistics from similar features.
@@ -1862,7 +1869,7 @@ def feature_selection(
         self,
         strategy: str | None = None,
         *,
-        solver: str | Callable | None = None,
+        solver: str | ESTIMATOR | None = None,
         n_features: SCALAR | None = None,
         min_repeated: SCALAR | None = 2,
         max_repeated: SCALAR | None = 1.0,
@@ -2005,7 +2012,7 @@ def run(
         n_trials: INT | dict | SEQUENCE = 0,
         ht_params: dict | None = None,
         n_bootstrap: INT | SEQUENCE = 0,
-        parallel: bool = False,
+        parallel: BOOL = False,
         errors: Literal["raise", "skip", "keep"] = "skip",
         **kwargs,
     ):
@@ -2061,7 +2068,7 @@ def successive_halving(
         n_trials: INT | dict | SEQUENCE = 0,
         ht_params: dict | None = None,
         n_bootstrap: INT | dict | SEQUENCE = 0,
-        parallel: bool = False,
+        parallel: BOOL = False,
         errors: Literal["raise", "skip", "keep"] = "skip",
         **kwargs,
     ):
@@ -2124,7 +2131,7 @@ def train_sizing(
         n_trials: INT | dict | SEQUENCE = 0,
         ht_params: dict | None = None,
         n_bootstrap: INT | dict | SEQUENCE = 0,
-        parallel: bool = False,
+        parallel: BOOL = False,
         errors: Literal["raise", "skip", "keep"] = "skip",
         **kwargs,
     ):
diff --git a/atom/basemodel.py b/atom/basemodel.py
index 1060c7421..723c128da 100644
--- a/atom/basemodel.py
+++ b/atom/basemodel.py
@@ -17,7 +17,7 @@
 from logging import Logger
 from typing import Any, Callable, Literal
 from unittest.mock import patch
-
+from typeguard import TypeCheckError
 import dill as pickle
 import mlflow
 import numpy as np
@@ -56,12 +56,12 @@
 from atom.basetransformer import BaseTransformer
 from atom.data_cleaning import Scaler
 from atom.pipeline import Pipeline
-from atom.plots import HTPlot, PredictionPlot, ShapPlot
+from atom.plots import RunnerPlot
 from atom.utils.constants import DF_ATTRS
 from atom.utils.types import (
     BOOL, BRANCH, DATAFRAME, DATAFRAME_TYPES, ENGINE, FEATURES, FLOAT,
-    FLOAT_TYPES, GOAL, INDEX, INT, INT_TYPES, METRIC_SELECTOR, PANDAS,
-    PREDICTOR, SCALAR, SCORER, SEQUENCE, SERIES, SLICE, TARGET,
+    FLOAT_TYPES, INDEX, INT, INT_TYPES, METRIC_SELECTOR, PANDAS,
+    PREDICTOR, SCALAR, SCORER, SEQUENCE, SERIES, SLICE, TARGET, WARNINGS,
 )
 from atom.utils.utils import (
     ClassMap, CustomDict, DataConfig, PlotCallback, ShapExplanation,
@@ -75,7 +75,7 @@
 
 
 @typechecked
-class BaseModel(BaseTransformer, BaseTracker, HTPlot, PredictionPlot, ShapPlot):
+class BaseModel(BaseTransformer, BaseTracker, RunnerPlot):
     """Base class for all models.
 
     Parameters
@@ -174,7 +174,7 @@ class BaseModel(BaseTransformer, BaseTracker, HTPlot, PredictionPlot, ShapPlot):
     def __init__(
         self,
         name: str | None = None,
-        goal: GOAL = "class",
+        goal: Literal["class", "reg", "fc"] = "class",
         config: DataConfig | None = None,
         og: BRANCH | None = None,
         branch: BRANCH | None = None,
@@ -184,7 +184,7 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: str = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
@@ -276,16 +276,12 @@ def _fullname(self) -> str:
         """Return the model's class name."""
         return self.__class__.__name__
 
-    @property
-    def _gpu(self) -> BOOL:
-        """Return whether the model uses a GPU implementation."""
-        return "gpu" in self.device.lower()
-
     @property
     def _est_class(self) -> PREDICTOR:
         """Return the estimator's class (not instance)."""
         try:
-            module = import_module(f"{self.engine['estimator']}.{self._module}")
+            engine = self.engine.get("estimator", "sklearn")
+            module = import_module(f"{engine}.{self._module}")
             cls = self._estimators.get(self.goal, self._estimators.get("reg"))
         except (ModuleNotFoundError, AttributeError):
             if "sklearn" in self.supports_engines:
@@ -442,9 +438,9 @@ def _get_est(self, **params) -> PREDICTOR:
     def _fit_estimator(
         self,
         estimator: PREDICTOR,
-        data: tuple[DATAFRAME, SERIES],
+        data: tuple[DATAFRAME, PANDAS],
         est_params_fit: dict,
-        validation: tuple[DATAFRAME, SERIES] | None = None,
+        validation: tuple[DATAFRAME, PANDAS] | None = None,
         trial: Trial | None = None,
     ) -> PREDICTOR:
         """Fit the estimator and perform in-training validation.
@@ -581,7 +577,7 @@ def _final_output(self) -> str:
                 if (1.2 if score_train < 0 else 0.8) * score_train > score_test:
                     out += " ~"
 
-        except AttributeError:  # Fails when model failed but errors="keep"
+        except TypeCheckError:  # Fails when model failed but errors="keep"
             out = "FAIL"
 
         return out
@@ -692,7 +688,7 @@ def _score_from_pred(
         y_true: PANDAS,
         y_pred: PANDAS,
         **kwargs,
-    ) -> FLOAT:
+    ) -> SCALAR:
         """Calculate the metric score from predicted values.
 
         Since sklearn metrics don't support multiclass-multioutput
@@ -715,7 +711,7 @@ def _score_from_pred(
 
         Returns
         -------
-        float
+        int or float
             Calculated score.
 
         """
@@ -740,7 +736,7 @@ def _get_score(
         dataset: str,
         threshold: tuple[FLOAT] | None = None,
         sample_weight: tuple | None = None,
-    ) -> FLOAT:
+    ) -> SCALAR:
         """Calculate a metric score using the prediction attributes.
 
         The method results are cached to avoid recalculation of the
@@ -771,7 +767,7 @@ def _get_score(
 
         Returns
         -------
-        float
+        int or float
             Metric score on the selected data set.
 
         """
@@ -886,7 +882,7 @@ def fit_model(
                 y_val = self.og.y_train.iloc[val_idx]
 
                 # Transform subsets if there is a pipeline
-                if len(pl := self.export_pipeline(verbose=0)[:-1]) > 0:
+                if len(pl := export_pipeline(self.pipeline, verbose=0)) > 0:
                     X_subtrain, y_subtrain = pl.fit_transform(X_subtrain, y_subtrain)
                     X_val, y_val = pl.transform(X_val, y_val)
 
@@ -1401,17 +1397,17 @@ def evals(self) -> CustomDict:
         return self._evals
 
     @property
-    def score_train(self) -> FLOAT | list[FLOAT]:
+    def score_train(self) -> SCALAR | list[SCALAR]:
         """Metric score on the training set."""
         return flt([self._get_score(m, "train") for m in self._metric])
 
     @property
-    def score_test(self) -> FLOAT | list[FLOAT]:
+    def score_test(self) -> SCALAR | list[SCALAR]:
         """Metric score on the test set."""
         return flt([self._get_score(m, "test") for m in self._metric])
 
     @property
-    def score_holdout(self) -> FLOAT | list[FLOAT]:
+    def score_holdout(self) -> SCALAR | list[SCALAR]:
         """Metric score on the holdout set."""
         return flt([self._get_score(m, "holdout") for m in self._metric])
 
@@ -1433,7 +1429,7 @@ def bootstrap(self) -> pd.DataFrame | None:
         return self._bootstrap
 
     @property
-    def score_bootstrap(self) -> FLOAT | list[FLOAT] | None:
+    def score_bootstrap(self) -> SCALAR | list[SCALAR] | None:
         """Mean metric score on the bootstrapped samples."""
         if self.bootstrap is not None:
             return flt(self.bootstrap.mean().tolist())
@@ -2141,7 +2137,7 @@ def inverse_transform(
         y: TARGET | None = None,
         *,
         verbose: INT | None = None,
-    ) -> PANDAS | tuple[DATAFRAME, SERIES]:
+    ) -> PANDAS | tuple[DATAFRAME, PANDAS]:
         """Inversely transform new data through the pipeline.
 
         Transformers that are only applied on the training set are
@@ -2200,7 +2196,7 @@ def register(
         self,
         name: str | None = None,
         stage: str = "None",
-        archive_existing_versions: bool = False,
+        archive_existing_versions: BOOL = False,
     ):
         """Register the model in [mlflow's model registry][registry].
 
@@ -2340,7 +2336,7 @@ def transform(
         y: TARGET | None = None,
         *,
         verbose: INT | None = None,
-    ) -> PANDAS | tuple[DATAFRAME, SERIES]:
+    ) -> PANDAS | tuple[DATAFRAME, PANDAS]:
         """Transform new data through the pipeline.
 
         Transformers that are only applied on the training set are
@@ -3490,7 +3486,7 @@ def predict_proba(
         self,
         fh: int | SEQUENCE | ForecastingHorizon,
         X: FEATURES | None = None,
-        marginal: bool = True,
+        marginal: BOOL = True,
         verbose: INT | None = None,
     ) -> Normal:
         """Get probabilistic forecasts on new data or existing rows.
@@ -3624,7 +3620,7 @@ def predict_var(
         self,
         fh: int | SEQUENCE | ForecastingHorizon,
         X: FEATURES | None = None,
-        cov: bool = False,
+        cov: BOOL = False,
         verbose: INT | None = None,
     ) -> DATAFRAME:
         """Get probabilistic forecasts on new data or existing rows.
diff --git a/atom/baserunner.py b/atom/baserunner.py
index 758553df0..7d66f424e 100644
--- a/atom/baserunner.py
+++ b/atom/baserunner.py
@@ -97,13 +97,10 @@ def __len__(self) -> int:
         return len(self.dataset)
 
     def __contains__(self, item: str) -> BOOL:
-        if self.dataset is None:
-            return False
-        else:
-            return item in self.dataset
+        return item in self.dataset
 
     def __getitem__(self, item: INT | str | list) -> Any:
-        if self.dataset is None:
+        if self.dataset.empty:
             raise RuntimeError(
                 "This instance has no dataset annexed to it. "
                 "Use the run method before calling __getitem__."
@@ -122,18 +119,13 @@ def __getitem__(self, item: INT | str | list) -> Any:
                     f"{self.__class__.__name__} object has no "
                     f"branch, model or column called {item}."
                 )
-        elif isinstance(item, list):
-            return self.dataset[item]  # Get subset of dataset
         else:
-            raise TypeError(
-                f"{self.__class__.__name__} is only "
-                "subscriptable with types int, str or list."
-            )
+            return self.dataset[item]  # Get subset of dataset
 
     # Utility properties =========================================== >>
 
     @property
-    def og(self) -> Branch:
+    def og(self) -> BRANCH:
         """Branch containing the original dataset.
 
         This branch contains the data prior to any transformations.
@@ -144,7 +136,7 @@ def og(self) -> Branch:
         return self._og or self.branch
 
     @property
-    def branch(self) -> Branch:
+    def branch(self) -> BRANCH:
         """Current active branch.
 
         Use the property's `@setter` to change the branch or to create
diff --git a/atom/basetracker.py b/atom/basetracker.py
index 6a919a637..8cb73fe9c 100644
--- a/atom/basetracker.py
+++ b/atom/basetracker.py
@@ -7,6 +7,8 @@
 
 """
 
+from __future__ import annotations
+
 from dataclasses import dataclass
 
 from typeguard import typechecked
diff --git a/atom/basetrainer.py b/atom/basetrainer.py
index 6b26fc6f9..0c94f89d3 100644
--- a/atom/basetrainer.py
+++ b/atom/basetrainer.py
@@ -13,7 +13,7 @@
 import traceback
 from datetime import datetime as dt
 from typing import Any
-
+from typeguard import TypeCheckError
 import joblib
 import mlflow
 import numpy as np
@@ -28,7 +28,7 @@
 from atom.branch import Branch
 from atom.data_cleaning import BaseTransformer
 from atom.models import MODELS, CustomModel
-from atom.plots import HTPlot, PredictionPlot, ShapPlot
+from atom.plots import RunnerPlot
 from atom.utils.types import MODEL, SEQUENCE_TYPES
 from atom.utils.utils import (
     ClassMap, DataConfig, check_dependency, get_best_score, get_custom_scorer,
@@ -37,7 +37,7 @@
 
 
 @typechecked
-class BaseTrainer(BaseTransformer, BaseRunner, HTPlot, PredictionPlot, ShapPlot):
+class BaseTrainer(BaseTransformer, BaseRunner, RunnerPlot):
     """Base class for trainers.
 
     Implements methods to check the validity of the parameters,
@@ -432,7 +432,7 @@ def execute_model(m: MODEL) -> MODEL | None:
 
             try:
                 scores.append(get_best_score(model))
-            except AttributeError:  # Fails when model failed but errors="keep"
+            except TypeCheckError:  # Fails when model failed but errors="keep"
                 scores.append(-np.inf)
 
             maxlen = max(maxlen, len(names[-1]))
diff --git a/atom/basetransformer.py b/atom/basetransformer.py
index edac1266a..c94b0df4a 100644
--- a/atom/basetransformer.py
+++ b/atom/basetransformer.py
@@ -18,7 +18,7 @@
 from importlib.util import find_spec
 from logging import DEBUG, FileHandler, Formatter, Logger, getLogger
 from multiprocessing import cpu_count
-from typing import Any, Callable
+from typing import Any, Callable, Literal
 
 import dagshub
 import dill as pickle
@@ -33,8 +33,8 @@
 from typeguard import typechecked
 
 from atom.utils.types import (
-    BOOL, DATAFRAME, DATAFRAME_TYPES, FEATURES, INDEX, INT, INT_TYPES, PANDAS,
-    PREDICTOR, SCALAR, SEQUENCE, SEQUENCE_TYPES, TARGET,
+    BACKEND, BOOL, DATAFRAME, DATAFRAME_TYPES, ENGINE, ESTIMATOR, FEATURES,
+    INT, INT_TYPES, PANDAS, SCALAR, SEQUENCE, SEQUENCE_TYPES, TARGET, WARNINGS,
 )
 from atom.utils.utils import (
     bk, composed, crash, get_cols, lst, merge, method_to_log, n_cols, pd, sign,
@@ -101,7 +101,8 @@ def n_jobs(self, value: INT):
             # Final check for negative input
             if value < 1:
                 raise ValueError(
-                    f"Invalid value for the n_jobs parameter, got {value}.", 1
+                    "Invalid value for the n_jobs parameter, "
+                    f"got {value}. Value should be >=0.", 1
                 )
 
         self._n_jobs = value
@@ -118,92 +119,55 @@ def device(self, value: str):
             os.environ["CUDA_VISIBLE_DEVICES"] = str(self._device_id)
 
     @property
-    def engine(self) -> dict:
+    def engine(self) -> ENGINE:
         """Execution engine for estimators."""
         return self._engine
 
     @engine.setter
-    def engine(self, value: dict | None):
-        if not value:
-            value = {"data": "numpy", "estimator": "sklearn"}
-        elif "data" not in value and "estimator" not in value:
-            raise ValueError(
-                f"Invalid value for the engine parameter, got {value}. "
-                "The value should be a dict with keys 'data' and/or 'estimator'."
+    def engine(self, value: ENGINE):
+        if value.get("data") == "modin" and not ray.is_initialized():
+            ray.init(
+                runtime_env={"env_vars": {"__MODIN_AUTOIMPORT_PANDAS__": "1"}},
+                log_to_driver=False,
             )
 
-        if data := value.get("data"):
-            if data.lower() == "modin":
-                if not ray.is_initialized():
-                    ray.init(
-                        runtime_env={"env_vars": {"__MODIN_AUTOIMPORT_PANDAS__": "1"}},
-                        log_to_driver=False,
-                    )
-            elif data.lower() not in ("numpy", "pyarrow"):
-                raise ValueError(
-                    "Invalid value for the data key of the engine parameter, "
-                    f"got {data}. Choose from: numpy, pyarrow, modin."
-                )
-        else:
-            value["data"] = "numpy"
-
         # Update env variable to use for PandasModin in utils.py
-        os.environ["ATOM_DATA_ENGINE"] = value["data"].lower()
-
-        if models := value.get("estimator"):
-            device = self.device.lower()
-
-            if models.lower() == "sklearnex":
-                if not find_spec("sklearnex"):
-                    raise ModuleNotFoundError(
-                        "Failed to import scikit-learn-intelex. The library is "
-                        "not installed. Note that the library only supports CPUs "
-                        "with a x86 architecture."
-                    )
-                else:
-                    import sklearnex
-                    sklearnex.set_config(device if "gpu" in device else "auto")
-            elif models.lower() == "cuml":
-                if not find_spec("cuml"):
-                    raise ModuleNotFoundError(
-                        "Failed to import cuml. Package is not installed. Refer "
-                        "to: https://rapids.ai/start.html#install."
-                    )
-                else:
-                    from cuml.common.device_selection import (
-                        set_global_device_type,
-                    )
-                    set_global_device_type("gpu" if "gpu" in device else "cpu")
-
-                    # See https://github.com/rapidsai/cuml/issues/5564
-                    from cuml.internals.memory_utils import (
-                        set_global_output_type,
-                    )
-                    set_global_output_type("numpy")
-
-            elif models.lower() != "sklearn":
-                raise ValueError(
-                    "Invalid value for the models key of the engine parameter, "
-                    f"got {models}. Choose from: sklearn, sklearnex, cuml."
+        os.environ["ATOM_DATA_ENGINE"] = value.get("data", "numpy")
+
+        if value.get("estimator") == "sklearnex":
+            if not find_spec("sklearnex"):
+                raise ModuleNotFoundError(
+                    "Failed to import scikit-learn-intelex. The library is "
+                    "not installed. Note that the library only supports CPUs "
+                    "with a x86 architecture."
                 )
-        else:
-            value["estimator"] = "sklearn"
+            else:
+                import sklearnex
+                sklearnex.set_config(self.device.lower() if self._gpu else "auto")
+        elif value.get("estimator") == "cuml":
+            if not find_spec("cuml"):
+                raise ModuleNotFoundError(
+                    "Failed to import cuml. Package is not installed. Refer "
+                    "to: https://rapids.ai/start.html#install."
+                )
+            else:
+                from cuml.common.device_selection import set_global_device_type
+                set_global_device_type("gpu" if self._gpu else "cpu")
+
+                # See https://github.com/rapidsai/cuml/issues/5564
+                from cuml.internals.memory_utils import set_global_output_type
+                set_global_output_type("numpy")
 
         self._engine = value
 
     @property
-    def backend(self) -> str:
+    def backend(self) -> BACKEND:
         """Parallelization backend."""
         return self._backend
 
     @backend.setter
-    def backend(self, value: str):
-        if value.lower() not in (opts := ("loky", "multiprocessing", "threading", "ray")):
-            raise ValueError(
-                f"Invalid value for the backend parameter, got "
-                f"{value}. Choose from: {', '.join(opts)}."
-            )
-        elif value.lower() == "ray":
+    def backend(self, value: BACKEND):
+        if value == "ray":
             register_ray()  # Register ray as joblib backend
             if not ray.is_initialized():
                 ray.init(log_to_driver=False)
@@ -211,35 +175,24 @@ def backend(self, value: str):
         self._backend = value
 
     @property
-    def verbose(self) -> INT:
+    def verbose(self) -> Literal[0, 1, 2]:
         """Verbosity level of the output."""
         return self._verbose
 
     @verbose.setter
-    def verbose(self, value: INT):
-        if value < 0 or value > 2:
-            raise ValueError(
-                "Invalid value for the verbose parameter. Value"
-                f" should be between 0 and 2, got {value}."
-            )
+    def verbose(self, value: Literal[0, 1, 2]):
         self._verbose = value
 
     @property
-    def warnings(self) -> str:
+    def warnings(self) -> WARNINGS:
         """Whether to show or suppress encountered warnings."""
         return self._warnings
 
     @warnings.setter
-    def warnings(self, value: BOOL | str):
+    def warnings(self, value: BOOL | WARNINGS):
         if isinstance(value, BOOL):
             self._warnings = "default" if value else "ignore"
         else:
-            options = ("default", "error", "ignore", "always", "module", "once")
-            if value not in options:
-                raise ValueError(
-                    "Invalid value for the warnings parameter, got "
-                    f"{value}. Choose from: {', '.join(options)}."
-                )
             self._warnings = value
 
         warnings.filterwarnings(self._warnings)  # Change the filter in this process
@@ -336,7 +289,7 @@ def experiment(self, value: str | None):
             mlflow.set_experiment(value)
 
     @property
-    def random_state(self) -> INT:
+    def random_state(self) -> INT | None:
         """Seed used by the random number generator."""
         return self._random_state
 
@@ -351,6 +304,11 @@ def random_state(self, value: INT | None):
         np.random.seed(value)
         self._random_state = value
 
+    @property
+    def _gpu(self) -> BOOL:
+        """Return whether the instance uses a GPU implementation."""
+        return "gpu" in self.device.lower()
+
     @property
     def _device_id(self) -> int:
         """Which GPU device to use."""
@@ -392,7 +350,7 @@ def _inherit(self, obj: Any) -> Any:
 
         return obj
 
-    def _get_est_class(self, name: str, module: str) -> PREDICTOR:
+    def _get_est_class(self, name: str, module: str) -> ESTIMATOR:
         """Import a class from a module.
 
         When the import fails, for example if atom uses sklearnex and
@@ -408,12 +366,13 @@ def _get_est_class(self, name: str, module: str) -> PREDICTOR:
 
         Returns
         -------
-        Predictor
+        Estimator
             Class of the estimator.
 
         """
         try:
-            return getattr(import_module(f"{self.engine['estimator']}.{module}"), name)
+            engine = self.engine.get("estimator", "sklearn")
+            return getattr(import_module(f"{engine}.{module}"), name)
         except (ModuleNotFoundError, AttributeError):
             return getattr(import_module(f"sklearn.{module}"), name)
 
@@ -925,7 +884,7 @@ def _has_data_sets(
             if self.goal == "fc" and not isinstance(y, (INT, str)):
                 # arrays=() and y=y for forecasting
                 sets = _no_data_sets(*self._prepare_input(y=y))
-            elif self.branch._data is None:
+            elif self.branch._data.empty:
                 raise ValueError(
                     "The data arrays are empty! Provide the data to run the pipeline "
                     "successfully. See the documentation for the allowed formats."
@@ -1042,7 +1001,7 @@ def log(self, msg: SCALAR | str, level: INT = 0, severity: str = "info"):
                 getattr(self.logger, severity)(str(text))
 
     @composed(crash, method_to_log)
-    def save(self, filename: str = "auto", *, save_data: bool = True):
+    def save(self, filename: str = "auto", *, save_data: BOOL = True):
         """Save the instance to a pickle file.
 
         Parameters
diff --git a/atom/branch.py b/atom/branch.py
index 423d0ce28..9abf44789 100644
--- a/atom/branch.py
+++ b/atom/branch.py
@@ -43,8 +43,8 @@ class Branch:
     name: str
         Name of the branch.
 
-    data: dataframe or None, default=None
-        Complete dataset.
+    data: dataframe, default=pd.DataFrame()
+        Complete dataset. Defaults to an empty frame if not provided.
 
     index: list or None, default=None
         A list containing the number of target columns, the indices of
@@ -61,7 +61,7 @@ class Branch:
     def __init__(
         self,
         name: str,
-        data: DATAFRAME | None = None,
+        data: DATAFRAME = pd.DataFrame(),
         index: list[INT, INDEX, INDEX] | None = None,
         holdout: DATAFRAME | None = None,
         parent: BRANCH | None = None,
@@ -69,7 +69,7 @@ def __init__(
         self._data = data
         self._idx = index
         self._holdout = holdout
-        self._pipeline = pd.Series(data=[], dtype="object")
+        self._pipeline = pd.Series(dtype="object")
         self._mapping = CustomDict()
 
         # If a parent branch is provided, transfer its attrs to this one
@@ -87,7 +87,7 @@ def __repr__(self) -> str:
         return f"Branch({self.name})"
 
     def __bool__(self):
-        return self._data is not None
+        return not self._data.empty
 
     @property
     def name(self) -> str:
@@ -172,7 +172,7 @@ def counter(name: str, dim: str) -> str:
         value = to_pandas(
             data=value,
             index=side.index if side_name else None,
-            name=getattr(under, "name", None) if under_name else None,
+            name=getattr(under, "name", None) if under_name else "target",
             columns=getattr(under, "columns", None) if under_name else None,
             dtype=under.dtypes if under_name else None,
         )
diff --git a/atom/data_cleaning.py b/atom/data_cleaning.py
index 4bcdbca48..59390a36b 100644
--- a/atom/data_cleaning.py
+++ b/atom/data_cleaning.py
@@ -47,8 +47,9 @@
 from atom.basetransformer import BaseTransformer
 from atom.utils.constants import MISSING_VALUES
 from atom.utils.types import (
-    BOOL, DATAFRAME, DATAFRAME_TYPES, ENGINE, ESTIMATOR, FEATURES, FLOAT, INT,
-    PANDAS, SCALAR, SEQUENCE, SEQUENCE_TYPES, SERIES_TYPES, TARGET,
+    BOOL, DATAFRAME, DATAFRAME_TYPES, DISCRETIZER_STRATS, ENGINE, ESTIMATOR,
+    FEATURES, FLOAT, INT, PANDAS, PRUNER_STRATS, SCALAR, SCALER_STRATS,
+    SEQUENCE, SEQUENCE_TYPES, SERIES_TYPES, STRAT_NUM, TARGET,
 )
 from atom.utils.utils import (
     CustomDict, bk, check_is_fitted, composed, crash, get_cols, it, lst, merge,
@@ -1082,7 +1083,7 @@ class Discretizer(BaseEstimator, TransformerMixin, BaseTransformer):
 
     def __init__(
         self,
-        strategy: str = "quantile",
+        strategy: DISCRETIZER_STRATS = "quantile",
         *,
         bins: INT | SEQUENCE | dict = 5,
         labels: SEQUENCE | dict | None = None,
@@ -1151,12 +1152,6 @@ def get_labels(labels, bins):
         self._check_n_features(X, reset=True)
         self._num_cols = list(X.select_dtypes(include="number"))
 
-        if self.strategy.lower() not in ("uniform", "quantile", "kmeans", "custom"):
-            raise ValueError(
-                f"Invalid value for the strategy parameter, got {self.strategy}. "
-                "Choose from: uniform, quantile, kmeans, custom."
-            )
-
         self.log("Fitting Discretizer...", 1)
 
         labels = {} if self.labels is None else self.labels
@@ -1173,7 +1168,7 @@ def get_labels(labels, bins):
             else:
                 bins = self.bins
 
-            if self.strategy.lower() != "custom":
+            if self.strategy != "custom":
                 if isinstance(bins, SEQUENCE_TYPES):
                     try:
                         bins = bins[i]  # Fetch the i-th bin for the i-th column
@@ -1186,15 +1181,16 @@ def get_labels(labels, bins):
 
                 estimator = self._get_est_class("KBinsDiscretizer", "preprocessing")
 
-                # cuML implementation has no random_state
+                # cuML implementation has no subsample and random_state
                 kwargs = {}
-                if "random_state" in sign(estimator):
+                if "subsample" in sign(estimator):
+                    kwargs["subsample"] = 200000
                     kwargs["random_state"] = self.random_state
 
                 self._discretizers[col] = estimator(
                     n_bins=bins,
                     encode="ordinal",
-                    strategy=self.strategy.lower(),
+                    strategy=self.strategy,
                     **kwargs,
                 ).fit(X[[col]])
 
@@ -1806,7 +1802,7 @@ class Imputer(BaseEstimator, TransformerMixin, BaseTransformer):
 
     def __init__(
         self,
-        strat_num: SCALAR | Literal["drop", "mean", "knn", "most_frequent"] = "drop",
+        strat_num: STRAT_NUM = "drop",
         strat_cat: Literal["drop", "most_frequent"] | str = "drop",
         *,
         max_nan_rows: SCALAR | None = None,
@@ -1853,12 +1849,6 @@ def fit(self, X: FEATURES, y: TARGET | None = None) -> Imputer:
         self._num_cols = list(X.select_dtypes(include="number"))
 
         # Check input Parameters
-        strategies = ["drop", "mean", "median", "knn", "most_frequent"]
-        if isinstance(self.strat_num, str) and self.strat_num.lower() not in strategies:
-            raise ValueError(
-                "Unknown strategy for the strat_num parameter, got "
-                f"{self.strat_num}. Choose from: {', '.join(strategies)}."
-            )
         if self.max_nan_rows:
             if self.max_nan_rows < 0:
                 raise ValueError(
@@ -1902,10 +1892,8 @@ def fit(self, X: FEATURES, y: TARGET | None = None) -> Imputer:
         self._imputers = {}
 
         # Load the imputer class from sklearn or cuml (different modules)
-        estimator = self._get_est_class(
-            name="SimpleImputer",
-            module="preprocessing" if self.engine["estimator"] == "cuml" else "impute",
-        )
+        module = "preprocessing" if self.engine.get("estimator") == "cuml" else "impute"
+        estimator = self._get_est_class("SimpleImputer", module)
 
         # Assign an imputer to each column
         for name, column in X.items():
@@ -2496,11 +2484,11 @@ class Pruner(BaseEstimator, TransformerMixin, BaseTransformer):
 
     def __init__(
         self,
-        strategy: str | SEQUENCE = "zscore",
+        strategy: PRUNER_STRATS | SEQUENCE = "zscore",
         *,
         method: SCALAR | Literal["drop", "minmax"] = "drop",
         max_sigma: SCALAR = 3,
-        include_target: bool = False,
+        include_target: BOOL = False,
         device: str = "cpu",
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         verbose: Literal[0, 1, 2] = 0,
@@ -2800,8 +2788,8 @@ class Scaler(BaseEstimator, TransformerMixin, BaseTransformer):
 
     def __init__(
         self,
-        strategy: str = "standard",
-        include_binary: bool = False,
+        strategy: SCALER_STRATS = "standard",
+        include_binary: BOOL = False,
         *,
         device: str = "cpu",
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
@@ -2853,14 +2841,8 @@ def fit(self, X: FEATURES, y: TARGET | None = None) -> Scaler:
             robust="RobustScaler",
         )
 
-        if self.strategy in strategies:
-            estimator = self._get_est_class(strategies[self.strategy], "preprocessing")
-            self._estimator = estimator(**self.kwargs)
-        else:
-            raise ValueError(
-                f"Invalid value for the strategy parameter, got {self.strategy}. "
-                f"Choose from: {', '.join(strategies)}."
-            )
+        estimator = self._get_est_class(strategies[self.strategy], "preprocessing")
+        self._estimator = estimator(**self.kwargs)
 
         self.log("Fitting Scaler...", 1)
         self._estimator.fit(X[self._num_cols])
diff --git a/atom/ensembles.py b/atom/ensembles.py
index 50cfec4f2..3763bd138 100644
--- a/atom/ensembles.py
+++ b/atom/ensembles.py
@@ -381,7 +381,7 @@ def fit(
         X: FEATURES,
         y: SEQUENCE,
         sample_weight: SEQUENCE | None = None,
-    ) -> VotingRegressor:
+    ) -> StackingClassifier:
         """Fit the estimators, skipping prefit ones.
 
         Parameters
diff --git a/atom/feature_engineering.py b/atom/feature_engineering.py
index 3c5482582..8f4238032 100644
--- a/atom/feature_engineering.py
+++ b/atom/feature_engineering.py
@@ -13,7 +13,7 @@
 from collections import defaultdict
 from logging import Logger
 from random import sample
-from typing import Callable, Literal
+from typing import Literal
 
 import featuretools as ft
 import joblib
@@ -36,10 +36,10 @@
 from atom.basetransformer import BaseTransformer
 from atom.data_cleaning import Scaler, TransformerMixin
 from atom.models import MODELS
-from atom.plots import FeatureSelectorPlot
+from atom.plots import FeatureSelectionPlot
 from atom.utils.types import (
-    BOOL, DATAFRAME, ENGINE, FEATURES, FLOAT, INT, INT_TYPES, SCALAR, SEQUENCE,
-    SEQUENCE_TYPES, SERIES_TYPES, TARGET,
+    BOOL, DATAFRAME, ENGINE, ESTIMATOR, FEATURES, FLOAT, INT, INT_TYPES,
+    SCALAR, SEQUENCE, SEQUENCE_TYPES, SERIES_TYPES, TARGET,
 )
 from atom.utils.utils import (
     CustomDict, check_is_fitted, check_scaling, composed, crash,
@@ -844,7 +844,7 @@ class FeatureSelector(
     BaseEstimator,
     TransformerMixin,
     BaseTransformer,
-    FeatureSelectorPlot,
+    FeatureSelectionPlot,
 ):
     """Reduce the number of features in the data.
 
@@ -1118,7 +1118,7 @@ def __init__(
         self,
         strategy: str | None = None,
         *,
-        solver: str | Callable | None = None,
+        solver: str | ESTIMATOR | None = None,
         n_features: SCALAR | None = None,
         min_repeated: SCALAR | None = 2,
         max_repeated: SCALAR | None = 1.0,
diff --git a/atom/models/__init__.py b/atom/models/__init__.py
new file mode 100644
index 000000000..54274dfa9
--- /dev/null
+++ b/atom/models/__init__.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module for models.
+
+To add new models note the following:
+
+1. Add the class in the right file depending on task.
+2. Models are ordered alphabetically.
+3. Models have the following structure:
+
+    Class attributes
+    ----------------
+    acronym: str
+        Acronym of the model's name.
+
+    needs_scaling: bool
+        Whether the model needs scaled features.
+
+    accepts_sparse: bool
+        Whether the model has native support for sparse matrices.
+
+    native_multilabel: bool
+        Whether the model has native support for multilabel tasks.
+
+    native_multioutput: bool
+        Whether the model has native support for multioutput tasks.
+
+    has_validation: str or None
+        Whether the model allows in-training validation. If str,
+        name of the estimator's parameter that states the number
+        of iterations. If None, no support for in-training
+        validation.
+
+    supports_engines: list
+        Engines that can be used to run this model.
+
+    _module: str
+        Module from which to load the class. If one of engines,
+        ignore the engine name, i.e. use "ensemble" instead of
+        "sklearn.ensemble".
+
+    _estimators: CustomDict
+        Name of the estimators per goal.
+
+    Instance attributes
+    -------------------
+    name: str
+        Name of the model. Defaults to the same as the acronym
+        but can be different if the same model is called multiple
+        times. The name is assigned in the basemodel.py module.
+
+    Methods
+    -------
+    _get_parameters(self, x) -> CustomDict:
+        Return the trial's suggestions with rounded decimals and
+        (optionally) custom changes to the params. Don't implement
+        if the parent's implementation is sufficient.
+
+    _trial_to_est(self, params) -> CustomDict:
+        Convert trial's hyperparameters to parameters for the
+        estimator. Only implement for models whose study params are
+        different from those for the estimator.
+
+    _fit_estimator(self, estimator, data, est_params_fit, validation, trial):
+        This method is called to fit the estimator. Implement only
+        to customize the fit.
+
+    _get_distributions(self) -> CustomDict:
+        Return a list of the hyperparameter distributions for
+        optimization.
+
+"""
+
+from atom.basemodel import ClassRegModel
+from atom.models.classreg import (
+    AdaBoost, AutomaticRelevanceDetermination, Bagging, BayesianRidge,
+    BernoulliNB, CatBoost, CategoricalNB, ComplementNB, DecisionTree, Dummy,
+    ElasticNet, ExtraTree, ExtraTrees, GaussianNB, GaussianProcess,
+    GradientBoostingMachine, HistGradientBoosting, HuberRegression,
+    KNearestNeighbors, Lasso, LeastAngleRegression, LightGBM,
+    LinearDiscriminantAnalysis, LinearSVM, LogisticRegression,
+    MultiLayerPerceptron, MultinomialNB, OrdinaryLeastSquares,
+    OrthogonalMatchingPursuit, PassiveAggressive, Perceptron,
+    QuadraticDiscriminantAnalysis, RadiusNearestNeighbors, RandomForest, Ridge,
+    StochasticGradientDescent, SupportVectorMachine, XGBoost,
+)
+from atom.models.ensembles import Stacking, Voting
+from atom.models.ts import (
+    ARIMA, ETS, AutoARIMA, ExponentialSmoothing, NaiveForecaster,
+    PolynomialTrend,
+)
+from atom.utils.types import PREDICTOR
+from atom.utils.utils import ClassMap
+
+
+# Available models
+MODELS = ClassMap(
+    AdaBoost,
+    ARIMA,
+    AutoARIMA,
+    AutomaticRelevanceDetermination,
+    Bagging,
+    BayesianRidge,
+    BernoulliNB,
+    CatBoost,
+    CategoricalNB,
+    ComplementNB,
+    DecisionTree,
+    Dummy,
+    ElasticNet,
+    ETS,
+    ExponentialSmoothing,
+    ExtraTree,
+    ExtraTrees,
+    GaussianNB,
+    GaussianProcess,
+    GradientBoostingMachine,
+    HuberRegression,
+    HistGradientBoosting,
+    KNearestNeighbors,
+    Lasso,
+    LeastAngleRegression,
+    LightGBM,
+    LinearDiscriminantAnalysis,
+    LinearSVM,
+    LogisticRegression,
+    MultiLayerPerceptron,
+    MultinomialNB,
+    NaiveForecaster,
+    OrdinaryLeastSquares,
+    OrthogonalMatchingPursuit,
+    PassiveAggressive,
+    Perceptron,
+    PolynomialTrend,
+    QuadraticDiscriminantAnalysis,
+    RadiusNearestNeighbors,
+    RandomForest,
+    Ridge,
+    StochasticGradientDescent,
+    SupportVectorMachine,
+    XGBoost,
+    key="acronym",
+)
+
+# Available ensembles
+ENSEMBLES = ClassMap(Stacking, Voting, key="acronym")
+
+# Available models + ensembles
+MODELS_ENSEMBLES = ClassMap(*MODELS, *ENSEMBLES, key="acronym")
+
+
+class CustomModel(ClassRegModel):
+    """Model with estimator provided by user."""
+
+    def __init__(self, **kwargs):
+        if callable(est := kwargs.pop("estimator")):  # Estimator provided by the user
+            self._est = est
+            self._params = {}
+        else:
+            self._est = est.__class__
+            self._params = est.get_params()  # Store the provided parameters
+
+        if hasattr(est, "name"):
+            name = est.name
+        else:
+            # If no name is provided, use the name of the class
+            name = self._fullname
+            if len(n := list(filter(str.isupper, name))) >= 2 and n not in MODELS:
+                name = "".join(n)
+
+        self.acronym = getattr(est, "acronym", name)
+        if not name.startswith(self.acronym):
+            raise ValueError(
+                f"The name ({name}) and acronym ({self.acronym}) of model "
+                f"{self._fullname} do not match. The name should start with "
+                f"the model's acronym."
+            )
+
+        self.needs_scaling = getattr(est, "needs_scaling", False)
+        self.native_multilabel = getattr(est, "native_multilabel", False)
+        self.native_multioutput = getattr(est, "native_multioutput", False)
+        self.has_validation = getattr(est, "has_validation", None)
+
+        super().__init__(name=name, **kwargs)
+
+    @property
+    def _fullname(self) -> str:
+        """Return the estimator's class name."""
+        return self._est_class.__name__
+
+    @property
+    def _est_class(self):
+        """Return the estimator's class."""
+        return self._est
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        PREDICTOR
+            Estimator instance.
+
+        """
+        return super()._get_est(**{**self._params, **params})
diff --git a/atom/models.py b/atom/models/classreg.py
similarity index 75%
rename from atom/models.py
rename to atom/models/classreg.py
index c29b6e52a..abb83c5bb 100644
--- a/atom/models.py
+++ b/atom/models/classreg.py
@@ -1,4081 +1,3254 @@
-# -*- coding: utf-8 -*-
-
-"""
-Automated Tool for Optimized Modelling (ATOM)
-Author: Mavs
-Description: Module containing all available models. The models are
-             ordered alphabetically. Classes must have the following
-             structure:
-
-        Class attributes
-        ----------------
-        acronym: str
-            Acronym of the model's name.
-
-        needs_scaling: bool
-            Whether the model needs scaled features.
-
-        accepts_sparse: bool
-            Whether the model has native support for sparse matrices.
-
-        native_multilabel: bool
-            Whether the model has native support for multilabel tasks.
-
-        native_multioutput: bool
-            Whether the model has native support for multioutput tasks.
-
-        has_validation: str or None
-            Whether the model allows in-training validation. If str,
-            name of the estimator's parameter that states the number
-            of iterations. If None, no support for in-training
-            validation.
-
-        supports_engines: list
-            Engines that can be used to run this model.
-
-        _module: str
-            Module from which to load the class. If one of engines,
-            ignore the engine name, i.e. use "ensemble" instead of
-            "sklearn.ensemble".
-
-        _estimators: CustomDict
-            Name of the estimators per goal.
-
-        Instance attributes
-        -------------------
-        name: str
-            Name of the model. Defaults to the same as the acronym
-            but can be different if the same model is called multiple
-            times. The name is assigned in the basemodel.py module.
-
-        Methods
-        -------
-        _get_parameters(self, x) -> CustomDict:
-            Return the trial's suggestions with rounded decimals and
-            (optionally) custom changes to the params. Don't implement
-            if the parent's implementation is sufficient.
-
-        _trial_to_est(self, params) -> CustomDict:
-            Convert trial's hyperparameters to parameters for the
-            estimator. Only implement for models whose study params are
-            different than those for the estimator.
-
-        _fit_estimator(self, estimator, data, est_params_fit, validation, trial):
-            This method is called to fit the estimator. Implement only
-            to customize the fit.
-
-        _get_distributions(self) -> CustomDict:
-            Return a list of the hyperparameter distributions for
-            optimization.
-
-"""
-
-from __future__ import annotations
-
-import numpy as np
-from optuna.distributions import CategoricalDistribution as Cat
-from optuna.distributions import FloatDistribution as Float
-from optuna.distributions import IntDistribution as Int
-from optuna.exceptions import TrialPruned
-from optuna.integration import (
-    CatBoostPruningCallback, LightGBMPruningCallback, XGBoostPruningCallback,
-)
-from optuna.trial import Trial
-
-from atom.basemodel import ClassRegModel, ForecastModel
-from atom.pipeline import Pipeline
-from atom.utils.types import DATAFRAME, PREDICTOR, SERIES
-from atom.utils.utils import (
-    CatBMetric, ClassMap, CustomDict, LGBMetric, XGBMetric, sign,
-)
-
-
-# Custom models ==================================================== >>
-
-class CustomModel(ClassRegModel):
-    """Model with estimator provided by user."""
-
-    def __init__(self, **kwargs):
-        if callable(est := kwargs.pop("estimator")):  # Estimator provided by the user
-            self._est = est
-            self._params = {}
-        else:
-            self._est = est.__class__
-            self._params = est.get_params()  # Store the provided parameters
-
-        if hasattr(est, "name"):
-            name = est.name
-        else:
-            # If no name is provided, use the name of the class
-            name = self._fullname
-            if len(n := list(filter(str.isupper, name))) >= 2 and n not in MODELS:
-                name = "".join(n)
-
-        self.acronym = getattr(est, "acronym", name)
-        if not name.startswith(self.acronym):
-            raise ValueError(
-                f"The name ({name}) and acronym ({self.acronym}) of model "
-                f"{self._fullname} do not match. The name should start with "
-                f"the model's acronym."
-            )
-
-        self.needs_scaling = getattr(est, "needs_scaling", False)
-        self.native_multilabel = getattr(est, "native_multilabel", False)
-        self.native_multioutput = getattr(est, "native_multioutput", False)
-        self.has_validation = getattr(est, "has_validation", None)
-
-        super().__init__(name=name, **kwargs)
-
-    @property
-    def _fullname(self) -> str:
-        """Return the estimator's class name."""
-        return self._est_class.__name__
-
-    @property
-    def _est_class(self):
-        """Return the estimator's class."""
-        return self._est
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        PREDICTOR
-            Estimator instance.
-
-        """
-        return super()._get_est(**{**self._params, **params})
-
-
-# Classification and Regression models ============================= >>
-
-class AdaBoost(ClassRegModel):
-    """Adaptive Boosting (with decision tree as base estimator).
-
-    AdaBoost is a meta-estimator that begins by fitting a
-    classifier/regressor on the original dataset and then fits
-    additional copies of the algorithm on the same dataset but where
-    the weights of instances are adjusted according to the error of
-    the current prediction.
-
-    Corresponding estimators are:
-
-    - [AdaBoostClassifier][] for classification tasks.
-    - [AdaBoostRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][adabdocs].
-
-    See Also
-    --------
-    atom.models:GradientBoostingMachine
-    atom.models:RandomForest
-    atom.models:XGBoost
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="AdaB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "AdaB"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "ensemble"
-    _estimators = CustomDict({"class": "AdaBoostClassifier", "reg": "AdaBoostRegressor"})
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            n_estimators=Int(50, 500, step=10),
-            learning_rate=Float(0.01, 10, log=True),
-        )
-
-        if self.goal == "class":
-            dist["algorithm"] = Cat(["SAMME.R", "SAMME"])
-        else:
-            dist["loss"] = Cat(["linear", "square", "exponential"])
-
-        return dist
-
-
-class AutomaticRelevanceDetermination(ClassRegModel):
-    """Automatic Relevance Determination.
-
-    Automatic Relevance Determination is very similar to
-    [BayesianRidge][], but can lead to sparser coefficients. Fit the
-    weights of a regression model, using an ARD prior. The weights of
-    the regression model are assumed to be in Gaussian distributions.
-
-    Corresponding estimators are:
-
-    - [ARDRegression][] for regression tasks.
-
-    Read more in sklearn's [documentation][arddocs].
-
-    See Also
-    --------
-    atom.models:BayesianRidge
-    atom.models:GaussianProcess
-    atom.models:LeastAngleRegression
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="ARD", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "ARD"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "ARDRegression"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_iter=Int(100, 1000, step=10),
-            alpha_1=Float(1e-4, 1, log=True),
-            alpha_2=Float(1e-4, 1, log=True),
-            lambda_1=Float(1e-4, 1, log=True),
-            lambda_2=Float(1e-4, 1, log=True),
-        )
-
-
-class Bagging(ClassRegModel):
-    """Bagging model (with decision tree as base estimator).
-
-    Bagging uses an ensemble meta-estimator that fits base predictors
-    on random subsets of the original dataset and then aggregate their
-    individual predictions (either by voting or by averaging) to form a
-    final prediction. Such a meta-estimator can typically be used as a
-    way to reduce the variance of a black-box estimator by introducing
-    randomization into its construction procedure and then making an
-    ensemble out of it.
-
-    Corresponding estimators are:
-
-    - [BaggingClassifier][] for classification tasks.
-    - [BaggingRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][bagdocs].
-
-    See Also
-    --------
-    atom.models:DecisionTree
-    atom.models:LogisticRegression
-    atom.models:RandomForest
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="Bag", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "Bag"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "ensemble"
-    _estimators = CustomDict({"class": "BaggingClassifier", "reg": "BaggingRegressor"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_estimators=Int(10, 500, step=10),
-            max_samples=Float(0.5, 1.0, step=0.1),
-            max_features=Float(0.5, 1.0, step=0.1),
-            bootstrap=Cat([True, False]),
-            bootstrap_features=Cat([True, False]),
-        )
-
-
-class BayesianRidge(ClassRegModel):
-    """Bayesian ridge regression.
-
-    Bayesian regression techniques can be used to include regularization
-    parameters in the estimation procedure: the regularization parameter
-    is not set in a hard sense but tuned to the data at hand.
-
-    Corresponding estimators are:
-
-    - [BayesianRidge][bayesianridgeclass] for regression tasks.
-
-    Read more in sklearn's [documentation][brdocs].
-
-    See Also
-    --------
-    atom.models:AutomaticRelevanceDetermination
-    atom.models:GaussianProcess
-    atom.models:LeastAngleRegression
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="BR", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "BR"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "BayesianRidge"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_iter=Int(100, 1000, step=10),
-            alpha_1=Float(1e-4, 1, log=True),
-            alpha_2=Float(1e-4, 1, log=True),
-            lambda_1=Float(1e-4, 1, log=True),
-            lambda_2=Float(1e-4, 1, log=True),
-        )
-
-
-class BernoulliNB(ClassRegModel):
-    """Bernoulli Naive Bayes.
-
-    BernoulliNB implements the Naive Bayes algorithm for multivariate
-    Bernoulli models. Like [MultinomialNB][], this classifier is
-    suitable for discrete data. The difference is that while MNB works
-    with occurrence counts, BNB is designed for binary/boolean features.
-
-    Corresponding estimators are:
-
-    - [BernoulliNB][bernoullinbclass] for classification tasks.
-
-    Read more in sklearn's [documentation][bnbdocs].
-
-    See Also
-    --------
-    atom.models:ComplementNB
-    atom.models:CategoricalNB
-    atom.models:MultinomialNB
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="BNB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "BNB"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "naive_bayes"
-    _estimators = CustomDict({"class": "BernoulliNB"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(0.01, 10, log=True),
-            fit_prior=Cat([True, False]),
-        )
-
-
-class CatBoost(ClassRegModel):
-    """Cat Boosting Machine.
-
-    CatBoost is a machine learning method based on gradient boosting
-    over decision trees. Main advantages of CatBoost:
-
-    - Superior quality when compared with other GBDT models on many
-      datasets.
-    - Best in class prediction speed.
-
-    Corresponding estimators are:
-
-    - [CatBoostClassifier][] for classification tasks.
-    - [CatBoostRegressor][] for regression tasks.
-
-    Read more in CatBoost's [documentation][catbdocs].
-
-    !!! warning
-        * CatBoost selects the weights achieved by the best evaluation
-          on the test set after training. This means that, by default,
-          there is some minor data leakage in the test set. Use the
-          `use_best_model=False` parameter to avoid this behavior or use
-          a [holdout set][data-sets] to evaluate the final estimator.
-        * [In-training validation][] and [pruning][] are disabled when
-          `#!python device="gpu"`.
-
-    !!! note
-        ATOM uses CatBoost's `n_estimators` parameter instead of
-        `iterations` to indicate the number of trees to fit. This is
-        done to have consistent naming with the [XGBoost][] and
-        [LightGBM][] models.
-
-    See Also
-    --------
-    atom.models:GradientBoostingMachine
-    atom.models:LightGBM
-    atom.models:XGBoost
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="CatB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "CatB"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "n_estimators"
-    supports_engines = ["catboost"]
-
-    _module = "catboost"
-    _estimators = CustomDict({"class": "CatBoostClassifier", "reg": "CatBoostRegressor"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("bootstrap_type", params) == "Bernoulli":
-            params.pop("bagging_temperature")
-        elif self._get_param("bootstrap_type", params) == "Bayesian":
-            params.pop("subsample")
-
-        return params
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the estimator instance.
-
-        Parameters
-        ----------
-        **params
-            Unpacked hyperparameters for the estimator.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        eval_metric = None
-        if getattr(self, "_metric", None) and not self._gpu:
-            eval_metric = CatBMetric(self._metric[0], task=self.task)
-
-        return self._est_class(
-            eval_metric=params.pop("eval_metric", eval_metric),
-            train_dir=params.pop("train_dir", ""),
-            allow_writing_files=params.pop("allow_writing_files", False),
-            thread_count=params.pop("n_jobs", self.n_jobs),
-            task_type=params.pop("task_type", "GPU" if self._gpu else "CPU"),
-            devices=str(self._device_id),
-            verbose=params.pop("verbose", False),
-            random_state=params.pop("random_state", self.random_state),
-            **params,
-        )
-
-    def _fit_estimator(
-        self,
-        estimator: PREDICTOR,
-        data: tuple[DATAFRAME, SERIES],
-        est_params_fit: dict,
-        validation: tuple[DATAFRAME, SERIES] | None = None,
-        trial: Trial | None = None,
-    ):
-        """Fit the estimator and perform in-training validation.
-
-        Parameters
-        ----------
-        estimator: Predictor
-            Instance to fit.
-
-        data: tuple
-            Training data of the form (X, y).
-
-        est_params_fit: dict
-            Additional parameters for the estimator's fit method.
-
-        validation: tuple or None
-            Validation data of the form (X, y). If None, no validation
-            is performed.
-
-        trial: [Trial][] or None
-            Active trial (during hyperparameter tuning).
-
-        Returns
-        -------
-        Predictor
-            Fitted instance.
-
-        """
-        params = est_params_fit.copy()
-
-        callbacks = params.pop("callbacks", [])
-        if trial and len(self._metric) == 1 and not self._gpu:
-            callbacks.append(cb := CatBoostPruningCallback(trial, "CatBMetric"))
-
-        # gpu implementation fails if callbacks!=None
-        estimator.fit(*data, eval_set=validation, callbacks=callbacks or None, **params)
-
-        if not self._gpu:
-            if validation:
-                # Create evals attribute with train and validation scores
-                m = self._metric[0].name
-                evals = estimator.evals_result_
-                self._evals[f"{m}_train"] = evals["learn"]["CatBMetric"]
-                self._evals[f"{m}_test"] = evals["validation"]["CatBMetric"]
-
-            if trial and len(self._metric) == 1 and cb._pruned:
-                # Add the pruned step to the output
-                step = len(self.evals[f'{m}_train'])
-                steps = estimator.get_params()[self.has_validation]
-                trial.params[self.has_validation] = f"{step}/{steps}"
-
-                trial.set_user_attr("estimator", estimator)
-                raise TrialPruned(cb._message)
-
-        return estimator
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_estimators=Int(20, 500, step=10),
-            learning_rate=Float(0.01, 1.0, log=True),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_child_samples=Int(1, 30),
-            bootstrap_type=Cat(["Bayesian", "Bernoulli"]),
-            bagging_temperature=Float(0, 10),
-            subsample=Float(0.5, 1.0, step=0.1),
-            reg_lambda=Float(0.001, 100, log=True),
-        )
-
-
-class CategoricalNB(ClassRegModel):
-    """Categorical Naive Bayes.
-
-    Categorical Naive Bayes implements the Naive Bayes algorithm for
-    categorical features.
-
-    Corresponding estimators are:
-
-    - [CategoricalNB][categoricalnbclass] for classification tasks.
-
-    Read more in sklearn's [documentation][catnbdocs].
-
-    See Also
-    --------
-    atom.models:BernoulliNB
-    atom.models:ComplementNB
-    atom.models:GaussianNB
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    import numpy as np
-
-    X = np.random.randint(5, size=(100, 100))
-    y = np.random.randint(2, size=100)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="CatNB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "CatNB"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "naive_bayes"
-    _estimators = CustomDict({"class": "CategoricalNB"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(0.01, 10, log=True),
-            fit_prior=Cat([True, False]),
-        )
-
-
-class ComplementNB(ClassRegModel):
-    """Complement Naive Bayes.
-
-    The Complement Naive Bayes classifier was designed to correct the
-    "severe assumptions" made by the standard [MultinomialNB][]
-    classifier. It is particularly suited for imbalanced datasets.
-
-    Corresponding estimators are:
-
-    - [ComplementNB][complementnbclass] for classification tasks.
-
-    Read more in sklearn's [documentation][cnbdocs].
-
-    See Also
-    --------
-    atom.models:BernoulliNB
-    atom.models:CategoricalNB
-    atom.models:MultinomialNB
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="CNB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "CNB"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "naive_bayes"
-    _estimators = CustomDict({"class": "ComplementNB"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(0.01, 10, log=True),
-            fit_prior=Cat([True, False]),
-            norm=Cat([True, False]),
-        )
-
-
-class DecisionTree(ClassRegModel):
-    """Single Decision Tree.
-
-    A single decision tree classifier/regressor.
-
-    Corresponding estimators are:
-
-    - [DecisionTreeClassifier][] for classification tasks.
-    - [DecisionTreeRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][treedocs].
-
-    See Also
-    --------
-    atom.models:ExtraTree
-    atom.models:ExtraTrees
-    atom.models:RandomForest
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="Tree", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "Tree"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "tree"
-    _estimators = CustomDict(
-        {"class": "DecisionTreeClassifier", "reg": "DecisionTreeRegressor"}
-    )
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        if self.goal == "class":
-            criterion = ["gini", "entropy"]
-        else:
-            criterion = ["squared_error", "absolute_error", "friedman_mse", "poisson"]
-
-        return CustomDict(
-            criterion=Cat(criterion),
-            splitter=Cat(["best", "random"]),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_samples_split=Int(2, 20),
-            min_samples_leaf=Int(1, 20),
-            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
-            ccp_alpha=Float(0, 0.035, step=0.005),
-        )
-
-
-class Dummy(ClassRegModel):
-    """Dummy classifier/regressor.
-
-    When doing supervised learning, a simple sanity check consists of
-    comparing one's estimator against simple rules of thumb. The
-    prediction methods completely ignore the input data. Do not use
-    this model for real problems. Use it only as a simple baseline
-    to compare with other models.
-
-    Corresponding estimators are:
-
-    - [DummyClassifier][] for classification tasks.
-    - [DummyRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][dummydocs].
-
-    See Also
-    --------
-    atom.models:DecisionTree
-    atom.models:ExtraTree
-    atom.models:NaiveForecaster
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="Dummy", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "Dummy"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "dummy"
-    _estimators = CustomDict({"class": "DummyClassifier", "reg": "DummyRegressor"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("strategy", params) != "quantile":
-            params.pop("quantile")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict()
-        if self.goal == "class":
-            dist["strategy"] = Cat(["most_frequent", "prior", "stratified", "uniform"])
-        else:
-            dist["strategy"] = Cat(["mean", "median", "quantile"])
-            dist["quantile"] = Float(0, 1.0, step=0.1)
-
-        return dist
-
-
-class ElasticNet(ClassRegModel):
-    """Linear Regression with elasticnet regularization.
-
-    Linear least squares with l1 and l2 regularization.
-
-    Corresponding estimators are:
-
-    - [ElasticNet][elasticnetreg] for regression tasks.
-
-    Read more in sklearn's [documentation][endocs].
-
-    See Also
-    --------
-    atom.models:Lasso
-    atom.models:OrdinaryLeastSquares
-    atom.models:Ridge
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="EN", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "EN"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "ElasticNet"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(1e-3, 10, log=True),
-            l1_ratio=Float(0.1, 0.9, step=0.1),
-            selection=Cat(["cyclic", "random"]),
-        )
-
-
-class ExtraTree(ClassRegModel):
-    """Extremely Randomized Tree.
-
-    Extra-trees differ from classic decision trees in the way they are
-    built. When looking for the best split to separate the samples of a
-    node into two groups, random splits are drawn for each of the
-    max_features randomly selected features and the best split among
-    those is chosen. When max_features is set 1, this amounts to
-    building a totally random decision tree.
-
-    Corresponding estimators are:
-
-    - [ExtraTreeClassifier][] for classification tasks.
-    - [ExtraTreeRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][treedocs].
-
-    See Also
-    --------
-    atom.models:DecisionTree
-    atom.models:ExtraTrees
-    atom.models:RandomForest
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="ETree", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "ETree"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "tree"
-    _estimators = CustomDict(
-        {"class": "ExtraTreeClassifier", "reg": "ExtraTreeRegressor"}
-    )
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if not self._get_param("bootstrap", params):
-            params.pop("max_samples")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        if self.goal == "class":
-            criterion = ["gini", "entropy"]
-        else:
-            criterion = ["squared_error", "absolute_error"]
-
-        return CustomDict(
-            criterion=Cat(criterion),
-            splitter=Cat(["random", "best"]),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_samples_split=Int(2, 20),
-            min_samples_leaf=Int(1, 20),
-            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
-            ccp_alpha=Float(0, 0.035, step=0.005),
-        )
-
-
-class ExtraTrees(ClassRegModel):
-    """Extremely Randomized Trees.
-
-    Extra-Trees use a meta estimator that fits a number of randomized
-    decision trees (a.k.a. [extra-trees][extratree]) on various
-    sub-samples of the dataset and uses averaging to improve the
-    predictive accuracy and control over-fitting.
-
-    Corresponding estimators are:
-
-    - [ExtraTreesClassifier][] for classification tasks.
-    - [ExtraTreesRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][etdocs].
-
-    See Also
-    --------
-    atom.models:DecisionTree
-    atom.models:ExtraTree
-    atom.models:RandomForest
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="ET", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "ET"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "ensemble"
-    _estimators = CustomDict(
-        {"class": "ExtraTreesClassifier", "reg": "ExtraTreesRegressor"}
-    )
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if not self._get_param("bootstrap", params):
-            params.pop("max_samples")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        if self.goal == "class":
-            criterion = ["gini", "entropy"]
-        else:
-            criterion = ["squared_error", "absolute_error"]
-
-        return CustomDict(
-            n_estimators=Int(10, 500, step=10),
-            criterion=Cat(criterion),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_samples_split=Int(2, 20),
-            min_samples_leaf=Int(1, 20),
-            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
-            bootstrap=Cat([True, False]),
-            max_samples=Cat([None, 0.5, 0.6, 0.7, 0.8, 0.9]),
-            ccp_alpha=Float(0, 0.035, step=0.005),
-        )
-
-
-class GaussianNB(ClassRegModel):
-    """Gaussian Naive Bayes.
-
-    Gaussian Naive Bayes implements the Naive Bayes algorithm for
-    classification. The likelihood of the features is assumed to
-    be Gaussian.
-
-    Corresponding estimators are:
-
-    - [GaussianNB][gaussiannbclass] for classification tasks.
-
-    Read more in sklearn's [documentation][gnbdocs].
-
-    See Also
-    --------
-    atom.models:BernoulliNB
-    atom.models:CategoricalNB
-    atom.models:ComplementNB
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="GNB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "GNB"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "naive_bayes"
-    _estimators = CustomDict({"class": "GaussianNB"})
-
-
-class GaussianProcess(ClassRegModel):
-    """Gaussian process.
-
-    Gaussian Processes are a generic supervised learning method
-    designed to solve regression and probabilistic classification
-    problems. The advantages of Gaussian processes are:
-
-    * The prediction interpolates the observations.
-    * The prediction is probabilistic (Gaussian) so that one can compute
-      empirical confidence intervals and decide based on those if one
-      should refit (online fitting, adaptive fitting) the prediction in
-      some region of interest.
-
-    The disadvantages of Gaussian processes include:
-
-    * They are not sparse, i.e. they use the whole samples/features
-      information to perform the prediction.
-    * They lose efficiency in high dimensional spaces, namely when the
-      number of features exceeds a few dozens.
-
-    Corresponding estimators are:
-
-    - [GaussianProcessClassifier][] for classification tasks.
-    - [GaussianProcessRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][gpdocs].
-
-    See Also
-    --------
-    atom.models:GaussianNB
-    atom.models:LinearDiscriminantAnalysis
-    atom.models:PassiveAggressive
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="GP", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "GP"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "gaussian_process"
-    _estimators = CustomDict(
-        {"class": "GaussianProcessClassifier", "reg": "GaussianProcessRegressor"}
-    )
-
-
-class GradientBoostingMachine(ClassRegModel):
-    """Gradient Boosting Machine.
-
-    A Gradient Boosting Machine builds an additive model in a forward
-    stage-wise fashion; it allows for the optimization of arbitrary
-    differentiable loss functions. In each stage `n_classes_` regression
-    trees are fit on the negative gradient of the loss function, e.g.
-    binary or multiclass log loss. Binary classification is a special
-    case where only a single regression tree is induced.
-
-    Corresponding estimators are:
-
-    - [GradientBoostingClassifier][] for classification tasks.
-    - [GradientBoostingRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][gbmdocs].
-
-    !!! tip
-        [HistGradientBoosting][] is a much faster variant of this
-        algorithm for intermediate datasets (n_samples >= 10k).
-
-    See Also
-    --------
-    atom.models:CatBoost
-    atom.models:HistGradientBoosting
-    atom.models:LightGBM
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="GBM", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "GBM"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "ensemble"
-    _estimators = CustomDict(
-        {"class": "GradientBoostingClassifier", "reg": "GradientBoostingRegressor"}
-    )
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("loss", params) not in ("huber", "quantile"):
-            params.pop("alpha")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            loss=Cat(["log_loss", "exponential"]),
-            learning_rate=Float(0.01, 1.0, log=True),
-            n_estimators=Int(10, 500, step=10),
-            subsample=Float(0.5, 1.0, step=0.1),
-            criterion=Cat(["friedman_mse", "squared_error"]),
-            min_samples_split=Int(2, 20),
-            min_samples_leaf=Int(1, 20),
-            max_depth=Int(1, 21),
-            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
-            ccp_alpha=Float(0, 0.035, step=0.005),
-        )
-
-        if self.task.startswith("multiclass"):
-            dist.pop("loss")  # Multiclass only supports log_loss
-        elif self.goal.startswith("reg"):
-            dist["loss"] = Cat(["squared_error", "absolute_error", "huber", "quantile"])
-            dist["alpha"] = Float(0.1, 0.9, step=0.1)
-
-        return dist
-
-
-class HuberRegression(ClassRegModel):
-    """Huber regressor.
-
-    Huber is a linear regression model that is robust to outliers. It
-    makes sure that the loss function is not heavily influenced by the
-    outliers while not completely ignoring their effect.
-
-    Corresponding estimators are:
-
-    - [HuberRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][huberdocs].
-
-    See Also
-    --------
-    atom.models:AutomaticRelevanceDetermination
-    atom.models:LeastAngleRegression
-    atom.models:OrdinaryLeastSquares
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="Huber", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "Huber"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "HuberRegressor"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            epsilon=Float(1, 10, log=True),
-            max_iter=Int(50, 500, step=10),
-            alpha=Float(1e-4, 1, log=True),
-        )
-
-
-class HistGradientBoosting(ClassRegModel):
-    """Histogram-based Gradient Boosting Machine.
-
-    This Histogram-based Gradient Boosting Machine is much faster than
-    the standard [GradientBoostingMachine][] for big datasets
-    (n_samples>=10k). This variation first bins the input samples into
-    integer-valued bins which tremendously reduces the number of
-    splitting points to consider, and allows the algorithm to leverage
-    integer-based data structures (histograms) instead of relying on
-    sorted continuous values when building the trees.
-
-    Corresponding estimators are:
-
-    - [HistGradientBoostingClassifier][] for classification tasks.
-    - [HistGradientBoostingRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][hgbmdocs].
-
-    See Also
-    --------
-    atom.models:CatBoost
-    atom.models:GradientBoostingMachine
-    atom.models:XGBoost
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="hGBM", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "hGBM"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "ensemble"
-    _estimators = CustomDict(
-        {
-            "class": "HistGradientBoostingClassifier",
-            "reg": "HistGradientBoostingRegressor",
-        }
-    )
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            loss=Cat(["squared_error", "absolute_error", "poisson", "quantile", "gamma"]),
-            learning_rate=Float(0.01, 1.0, log=True),
-            max_iter=Int(10, 500, step=10),
-            max_leaf_nodes=Int(10, 50),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_samples_leaf=Int(10, 30),
-            l2_regularization=Float(0, 1.0, step=0.1),
-        )
-
-        if self.goal == "class":
-            dist.pop("loss")
-
-        return dist
-
-
-class KNearestNeighbors(ClassRegModel):
-    """K-Nearest Neighbors.
-
-    K-Nearest Neighbors, as the name clearly indicates, implements the
-    k-nearest neighbors vote. For regression, the target is predicted
-    by local interpolation of the targets associated of the nearest
-    neighbors in the training set.
-
-    Corresponding estimators are:
-
-    - [KNeighborsClassifier][] for classification tasks.
-    - [KNeighborsRegressor][] for classification tasks.
-
-    Read more in sklearn's [documentation][knndocs].
-
-    See Also
-    --------
-    atom.models:LinearDiscriminantAnalysis
-    atom.models:QuadraticDiscriminantAnalysis
-    atom.models:RadiusNearestNeighbors
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="KNN", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "KNN"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "neighbors"
-    _estimators = CustomDict(
-        {"class": "KNeighborsClassifier", "reg": "KNeighborsRegressor"}
-    )
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            n_neighbors=Int(1, 100),
-            weights=Cat(["uniform", "distance"]),
-            algorithm=Cat(["auto", "ball_tree", "kd_tree", "brute"]),
-            leaf_size=Int(20, 40),
-            p=Int(1, 2),
-        )
-
-        if self._gpu:
-            dist.pop("algorithm")  # Only 'brute' is supported
-            if self.engine["estimator"] == "cuml":
-                dist.pop("weights")  # Only 'uniform' is supported
-                dist.pop("leaf_size")
-                dist.pop("p")
-
-        return dist
-
-
-class Lasso(ClassRegModel):
-    """Linear Regression with lasso regularization.
-
-    Linear least squares with l1 regularization.
-
-    Corresponding estimators are:
-
-    - [Lasso][lassoreg] for regression tasks.
-
-    Read more in sklearn's [documentation][lassodocs].
-
-    See Also
-    --------
-    atom.models:ElasticNet
-    atom.models:OrdinaryLeastSquares
-    atom.models:Ridge
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="Lasso", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "Lasso"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "Lasso"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(1e-3, 10, log=True),
-            selection=Cat(["cyclic", "random"]),
-        )
-
-
-class LeastAngleRegression(ClassRegModel):
-    """Least Angle Regression.
-
-    Least-Angle Regression is a regression algorithm for
-    high-dimensional data. Lars is similar to forward stepwise
-    regression. At each step, it finds the feature most correlated
-    with the target. When there are multiple features having equal
-    correlation, instead of continuing along the same feature, it
-    proceeds in a direction equiangular between the features.
-
-    Corresponding estimators are:
-
-    - [Lars][] for regression tasks.
-
-    Read more in sklearn's [documentation][larsdocs].
-
-    See Also
-    --------
-    atom.models:BayesianRidge
-    atom.models:HuberRegression
-    atom.models:OrdinaryLeastSquares
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="Lars", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "Lars"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "Lars"})
-
-
-class LightGBM(ClassRegModel):
-    """Light Gradient Boosting Machine.
-
-    LightGBM is a gradient boosting model that uses tree based learning
-    algorithms. It is designed to be distributed and efficient with the
-    following advantages:
-
-    - Faster training speed and higher efficiency.
-    - Lower memory usage.
-    - Better accuracy.
-    - Capable of handling large-scale data.
-
-    Corresponding estimators are:
-
-    - [LGBMClassifier][] for classification tasks.
-    - [LGBMRegressor][] for regression tasks.
-
-    Read more in LightGBM's [documentation][lgbdocs].
-
-    !!! info
-        Using LightGBM's [GPU acceleration][estimator-acceleration]
-        requires [additional software dependencies][lgb_gpu].
-
-    See Also
-    --------
-    atom.models:CatBoost
-    atom.models:GradientBoostingMachine
-    atom.models:XGBoost
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="LGB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "LGB"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "n_estimators"
-    supports_engines = ["lightgbm"]
-
-    _module = "lightgbm.sklearn"
-    _estimators = CustomDict({"class": "LGBMClassifier", "reg": "LGBMRegressor"})
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        # Custom lightgbm mapping for warnings
-        # PYTHONWARNINGS doesn't work since they go from C/C++ code to stdout
-        warns = dict(always=2, default=1, error=0, ignore=-1)
-
-        return self._est_class(
-            verbose=params.pop("verbose", warns.get(self.warnings, -1)),
-            n_jobs=params.pop("n_jobs", self.n_jobs),
-            device=params.pop("device", "gpu" if self._gpu else "cpu"),
-            gpu_device_id=params.pop("gpu_device_id", self._device_id or -1),
-            random_state=params.pop("random_state", self.random_state),
-            **params,
-        )
-
-    def _fit_estimator(
-        self,
-        estimator: PREDICTOR,
-        data: tuple[DATAFRAME, SERIES],
-        est_params_fit: dict,
-        validation: tuple[DATAFRAME, SERIES] | None = None,
-        trial: Trial | None = None,
-    ):
-        """Fit the estimator and perform in-training validation.
-
-        Parameters
-        ----------
-        estimator: Predictor
-            Instance to fit.
-
-        data: tuple
-            Training data of the form (X, y).
-
-        est_params_fit: dict
-            Additional parameters for the estimator's fit method.
-
-        validation: tuple or None
-            Validation data of the form (X, y). If None, no validation
-            is performed.
-
-        trial: [Trial][] or None
-            Active trial (during hyperparameter tuning).
-
-        Returns
-        -------
-        Predictor
-            Fitted instance.
-
-        """
-        from lightgbm.callback import log_evaluation
-
-        m = self._metric[0].name
-        params = est_params_fit.copy()
-
-        callbacks = params.pop("callbacks", []) + [log_evaluation(-1)]
-        if trial and len(self._metric) == 1:
-            callbacks.append(LightGBMPruningCallback(trial, m, "valid_1"))
-
-        eval_metric = None
-        if getattr(self, "_metric", None):
-            eval_metric = LGBMetric(self._metric[0], task=self.task)
-
-        try:
-            estimator.fit(
-                *data,
-                eval_set=[data, validation] if validation else None,
-                eval_metric=params.pop("eval_metric", eval_metric),
-                callbacks=callbacks,
-                **params,
-            )
-        except TrialPruned as ex:
-            # Add the pruned step to the output
-            step = str(ex).split(" ")[-1][:-1]
-            steps = estimator.get_params()[self.has_validation]
-            trial.params[self.has_validation] = f"{step}/{steps}"
-
-            trial.set_user_attr("estimator", estimator)
-            raise ex
-
-        if validation:
-            # Create evals attribute with train and validation scores
-            self._evals[f"{m}_train"] = estimator.evals_result_["training"][m]
-            self._evals[f"{m}_test"] = estimator.evals_result_["valid_1"][m]
-
-        return estimator
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_estimators=Int(20, 500, step=10),
-            learning_rate=Float(0.01, 1.0, log=True),
-            max_depth=Int(-1, 17, step=2),
-            num_leaves=Int(20, 40),
-            min_child_weight=Float(1e-4, 100, log=True),
-            min_child_samples=Int(1, 30),
-            subsample=Float(0.5, 1.0, step=0.1),
-            colsample_bytree=Float(0.4, 1.0, step=0.1),
-            reg_alpha=Float(1e-4, 100, log=True),
-            reg_lambda=Float(1e-4, 100, log=True),
-        )
-
-
-class LinearDiscriminantAnalysis(ClassRegModel):
-    """Linear Discriminant Analysis.
-
-    Linear Discriminant Analysis is a classifier with a linear
-    decision boundary, generated by fitting class conditional densities
-    to the data and using Bayes’ rule. The model fits a Gaussian
-    density to each class, assuming that all classes share the same
-    covariance matrix.
-
-    Corresponding estimators are:
-
-    - [LinearDiscriminantAnalysis][ldaclassifier] for classification tasks.
-
-    Read more in sklearn's [documentation][ldadocs].
-
-    See Also
-    --------
-    atom.models:LogisticRegression
-    atom.models:RadiusNearestNeighbors
-    atom.models:QuadraticDiscriminantAnalysis
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="LDA", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "LDA"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "discriminant_analysis"
-    _estimators = CustomDict({"class": "LinearDiscriminantAnalysis"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("solver", params) == "svd":
-            params.pop("shrinkage")
-
-        return params
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            solver=Cat(["svd", "lsqr", "eigen"]),
-            shrinkage=Cat([None, "auto", 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
-        )
-
-
-class LinearSVM(ClassRegModel):
-    """Linear Support Vector Machine.
-
-    Similar to [SupportVectorMachine][] but with a linear kernel.
-    Implemented in terms of liblinear rather than libsvm, so it has
-    more flexibility in the choice of penalties and loss functions and
-    should scale better to large numbers of samples.
-
-    Corresponding estimators are:
-
-    - [LinearSVC][] for classification tasks.
-    - [LinearSVR][] for classification tasks.
-
-    Read more in sklearn's [documentation][svmdocs].
-
-    See Also
-    --------
-    atom.models:KNearestNeighbors
-    atom.models:StochasticGradientDescent
-    atom.models:SupportVectorMachine
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="lSVM", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "lSVM"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "svm"
-    _estimators = CustomDict({"class": "LinearSVC", "reg": "LinearSVR"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self.goal == "class":
-            if self._get_param("loss", params) == "hinge":
-                # l1 regularization can't be combined with hinge
-                params.replace_value("penalty", "l2")
-                # l2 regularization can't be combined with hinge when dual=False
-                params.replace_value("dual", True)
-            elif self._get_param("loss", params) == "squared_hinge":
-                # l1 regularization can't be combined with squared_hinge when dual=True
-                if self._get_param("penalty", params) == "l1":
-                    params.replace_value("dual", False)
-        elif self._get_param("loss", params) == "epsilon_insensitive":
-            params.replace_value("dual", True)
-
-        return params
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the estimator instance.
-
-        Parameters
-        ----------
-        **params
-            Unpacked hyperparameters for the estimator.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        if self.engine["estimator"] == "cuml" and self.goal == "class":
-            return self._est_class(probability=params.pop("probability", True), **params)
-        else:
-            return super()._get_est(**params)
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict()
-        if self.goal == "class":
-            dist["penalty"] = Cat(["l1", "l2"])
-            dist["loss"] = Cat(["hinge", "squared_hinge"])
-        else:
-            dist["loss"] = Cat(["epsilon_insensitive", "squared_epsilon_insensitive"])
-
-        dist["C"] = Float(1e-3, 100, log=True)
-        dist["dual"] = Cat([True, False])
-
-        if self.engine["estimator"] == "cuml":
-            dist.pop("dual")
-
-        return dist
-
-
-class LogisticRegression(ClassRegModel):
-    """Logistic Regression.
-
-    Logistic regression, despite its name, is a linear model for
-    classification rather than regression. Logistic regression is also
-    known in the literature as logit regression, maximum-entropy
-    classification (MaxEnt) or the log-linear classifier. In this model,
-    the probabilities describing the possible outcomes of a single trial
-    are modeled using a logistic function.
-
-    Corresponding estimators are:
-
-    - [LogisticRegression][] for classification tasks.
-
-    Read more in sklearn's [documentation][lrdocs].
-
-    See Also
-    --------
-    atom.models:GaussianProcess
-    atom.models:LinearDiscriminantAnalysis
-    atom.models:PassiveAggressive
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="RF", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "LR"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"class": "LogisticRegression"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        # Limitations on penalty + solver combinations
-        penalty = self._get_param("penalty", params)
-        solver = self._get_param("solver", params)
-        cond_1 = penalty is None and solver == "liblinear"
-        cond_2 = penalty == "l1" and solver not in ("liblinear", "saga")
-        cond_3 = penalty == "elasticnet" and solver != "saga"
-
-        if cond_1 or cond_2 or cond_3:
-            params.replace_value("penalty", "l2")  # Change to default value
-
-        if self._get_param("penalty", params) != "elasticnet":
-            params.pop("l1_ratio")
-
-        if self._get_param("penalty", params) is None:
-            params.pop("C")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            penalty=Cat([None, "l1", "l2", "elasticnet"]),
-            C=Float(1e-3, 100, log=True),
-            solver=Cat(["lbfgs", "newton-cg", "liblinear", "sag", "saga"]),
-            max_iter=Int(100, 1000, step=10),
-            l1_ratio=Float(0, 1.0, step=0.1),
-        )
-
-        if self._gpu:
-            dist.pop("solver")
-            dist.pop("penalty")  # Only 'l2' is supported
-        elif self.engine["estimator"] == "sklearnex":
-            dist["solver"] = Cat(["lbfgs", "newton-cg"])
-
-        return dist
-
-
-class MultiLayerPerceptron(ClassRegModel):
-    """Multi-layer Perceptron.
-
-    Multi-layer Perceptron is a supervised learning algorithm that
-    learns a function by training on a dataset. Given a set of features
-    and a target, it can learn a non-linear function approximator for
-    either classification or regression. It is different from logistic
-    regression, in that between the input and the output layer, there
-    can be one or more non-linear layers, called hidden layers.
-
-    Corresponding estimators are:
-
-    - [MLPClassifier][] for classification tasks.
-    - [MLPRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][mlpdocs].
-
-    See Also
-    --------
-    atom.models:PassiveAggressive
-    atom.models:Perceptron
-    atom.models:StochasticGradientDescent
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="MLP", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "MLP"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = False
-    has_validation = "max_iter"
-    supports_engines = ["sklearn"]
-
-    _module = "neural_network"
-    _estimators = CustomDict({"class": "MLPClassifier", "reg": "MLPRegressor"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        # Drop layers when a previous layer has 0 neurons
-        drop = False
-        for param in [p for p in sorted(params) if p.startswith("hidden_layer")]:
-            if params[param] == 0 or drop:
-                drop = True
-                params.pop(param)
-
-        if self._get_param("solver", params) != "sgd":
-            params.pop("learning_rate")
-            params.pop("power_t")
-        else:
-            params.pop("learning_rate_init")
-
-        return params
-
-    def _trial_to_est(self, params: CustomDict) -> CustomDict:
-        """Convert trial's hyperparameters to parameters for the estimator.
-
-        Parameters
-        ----------
-        params: CustomDict
-            Trial's hyperparameters.
-
-        Returns
-        -------
-        CustomDict
-            Estimator's hyperparameters.
-
-        """
-        params = super()._trial_to_est(params)
-
-        hidden_layer_sizes = []
-        for param in [p for p in sorted(params) if p.startswith("hidden_layer")]:
-            hidden_layer_sizes.append(params.pop(param))
-
-        if hidden_layer_sizes:
-            params.insert(0, "hidden_layer_sizes", tuple(hidden_layer_sizes))
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            hidden_layer_1=Int(10, 100),
-            hidden_layer_2=Int(0, 100),
-            hidden_layer_3=Int(0, 10),
-            activation=Cat(["identity", "logistic", "tanh", "relu"]),
-            solver=Cat(["lbfgs", "sgd", "adam"]),
-            alpha=Float(1e-4, 0.1, log=True),
-            batch_size=Cat(["auto", 8, 16, 32, 64, 128, 256]),
-            learning_rate=Cat(["constant", "invscaling", "adaptive"]),
-            learning_rate_init=Float(1e-3, 0.1, log=True),
-            power_t=Float(0.1, 0.9, step=0.1),
-            max_iter=Int(50, 500, step=10),
-        )
-
-        # Drop layers if sizes are specified by user
-        return dist[3:] if "hidden_layer_sizes" in self._est_params else dist
-
-
-class MultinomialNB(ClassRegModel):
-    """Multinomial Naive Bayes.
-
-    MultinomialNB implements the Naive Bayes algorithm for multinomially
-    distributed data, and is one of the two classic Naive Bayes variants
-    used in text classification (where the data are typically
-    represented as word vector counts, although tf-idf vectors are also
-    known to work well in practice).
-
-    Corresponding estimators are:
-
-    - [MultinomialNB][multinomialnbclass] for classification tasks.
-
-    Read more in sklearn's [documentation][mnbdocs].
-
-    See Also
-    --------
-    atom.models:BernoulliNB
-    atom.models:ComplementNB
-    atom.models:GaussianNB
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="MNB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "MNB"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "cuml"]
-
-    _module = "naive_bayes"
-    _estimators = CustomDict({"class": "MultinomialNB"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            alpha=Float(0.01, 10, log=True),
-            fit_prior=Cat([True, False]),
-        )
-
-
-class OrdinaryLeastSquares(ClassRegModel):
-    """Linear Regression.
-
-    Ordinary Least Squares is just linear regression without any
-    regularization. It fits a linear model with coefficients `w=(w1,
-     ..., wp)` to minimize the residual sum of squares between the
-    observed targets in the dataset, and the targets predicted by the
-    linear approximation.
-
-    Corresponding estimators are:
-
-    - [LinearRegression][] for regression tasks.
-
-    Read more in sklearn's [documentation][olsdocs].
-
-    See Also
-    --------
-    atom.models:ElasticNet
-    atom.models:Lasso
-    atom.models:Ridge
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="OLS", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "OLS"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "LinearRegression"})
-
-
-class OrthogonalMatchingPursuit(ClassRegModel):
-    """Orthogonal Matching Pursuit.
-
-    Orthogonal Matching Pursuit implements the OMP algorithm for
-    approximating the fit of a linear model with constraints imposed
-    on the number of non-zero coefficients.
-
-    Corresponding estimators are:
-
-    - [OrthogonalMatchingPursuit][] for regression tasks.
-
-    Read more in sklearn's [documentation][ompdocs].
-
-    See Also
-    --------
-    atom.models:Lasso
-    atom.models:LeastAngleRegression
-    atom.models:OrdinaryLeastSquares
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="OMP", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "OMP"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"reg": "OrthogonalMatchingPursuit"})
-
-
-class PassiveAggressive(ClassRegModel):
-    """Passive Aggressive.
-
-    The passive-aggressive algorithms are a family of algorithms for
-    large-scale learning. They are similar to the Perceptron in that
-    they do not require a learning rate. However, contrary to the
-    [Perceptron][], they include a regularization parameter `C`.
-
-    Corresponding estimators are:
-
-    - [PassiveAggressiveClassifier][] for classification tasks.
-    - [PassiveAggressiveRegressor][] for classification tasks.
-
-    Read more in sklearn's [documentation][padocs].
-
-    See Also
-    --------
-    atom.models:MultiLayerPerceptron
-    atom.models:Perceptron
-    atom.models:StochasticGradientDescent
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="PA", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "PA"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "max_iter"
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict(
-        {"class": "PassiveAggressiveClassifier", "reg": "PassiveAggressiveRegressor"}
-    )
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        if self.goal == "class":
-            loss = ["hinge", "squared_hinge"]
-        else:
-            loss = ["epsilon_insensitive", "squared_epsilon_insensitive"]
-
-        return CustomDict(
-            C=Float(1e-3, 100, log=True),
-            max_iter=Int(500, 1500, step=50),
-            loss=Cat(loss),
-            average=Cat([True, False]),
-        )
-
-
-class Perceptron(ClassRegModel):
-    """Linear Perceptron classification.
-
-    The Perceptron is a simple classification algorithm suitable for
-    large scale learning. By default:
-
-    * It does not require a learning rate.
-    * It is not regularized (penalized).
-    * It updates its model only on mistakes.
-
-    The last characteristic implies that the Perceptron is slightly
-    faster to train than [StochasticGradientDescent][] with the hinge
-    loss and that the resulting models are sparser.
-
-    Corresponding estimators are:
-
-    - [Perceptron][percclassifier] for classification tasks.
-
-    Read more in sklearn's [documentation][percdocs].
-
-    See Also
-    --------
-    atom.models:MultiLayerPerceptron
-    atom.models:PassiveAggressive
-    atom.models:StochasticGradientDescent
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="Perc", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "Perc"
-    needs_scaling = True
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "max_iter"
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"class": "Perceptron"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("penalty", params) != "elasticnet":
-            params.pop("l1_ratio")
-
-        return params
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            penalty=Cat([None, "l2", "l1", "elasticnet"]),
-            alpha=Float(1e-4, 10, log=True),
-            l1_ratio=Float(0.1, 0.9, step=0.1),
-            max_iter=Int(500, 1500, step=50),
-            eta0=Float(1e-2, 10, log=True),
-        )
-
-
-class QuadraticDiscriminantAnalysis(ClassRegModel):
-    """Quadratic Discriminant Analysis.
-
-    Quadratic Discriminant Analysis is a classifier with a quadratic
-    decision boundary, generated by fitting class conditional densities
-    to the data and using Bayes’ rule. The model fits a Gaussian
-    density to each class, assuming that all classes share the same
-    covariance matrix.
-
-    Corresponding estimators are:
-
-    - [QuadraticDiscriminantAnalysis][qdaclassifier] for classification tasks.
-
-    Read more in sklearn's [documentation][ldadocs].
-
-    See Also
-    --------
-    atom.models:LinearDiscriminantAnalysis
-    atom.models:LogisticRegression
-    atom.models:RadiusNearestNeighbors
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="QDA", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "QDA"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "discriminant_analysis"
-    _estimators = CustomDict({"class": "QuadraticDiscriminantAnalysis"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(reg_param=Float(0, 1.0, step=0.1))
-
-
-class RadiusNearestNeighbors(ClassRegModel):
-    """Radius Nearest Neighbors.
-
-    Radius Nearest Neighbors implements the nearest neighbors vote,
-    where the neighbors are selected from within a given radius. For
-    regression, the target is predicted by local interpolation of the
-    targets associated of the nearest neighbors in the training set.
-
-    !!! warning
-        * The `radius` parameter should be tuned to the data at hand or
-          the model will perform poorly.
-        * If outliers are detected, the estimator raises an exception
-          unless `est_params={"outlier_label": "most_frequent"}` is used.
-
-    Corresponding estimators are:
-
-    - [RadiusNeighborsClassifier][] for classification tasks.
-    - [RadiusNeighborsRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][knndocs].
-
-    See Also
-    --------
-    atom.models:KNearestNeighbors
-    atom.models:LinearDiscriminantAnalysis
-    atom.models:QuadraticDiscriminantAnalysis
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(
-        models="RNN",
-        metric="f1",
-        est_params={"outlier_label": "most_frequent"},
-        verbose=2,
-    )
-    ```
-
-    """
-
-    acronym = "RNN"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn"]
-
-    _module = "neighbors"
-    _estimators = CustomDict(
-        {"class": "RadiusNeighborsClassifier", "reg": "RadiusNeighborsRegressor"}
-    )
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            radius=Float(1e-2, 100),
-            weights=Cat(["uniform", "distance"]),
-            algorithm=Cat(["auto", "ball_tree", "kd_tree", "brute"]),
-            leaf_size=Int(20, 40),
-            p=Int(1, 2),
-        )
-
-
-class RandomForest(ClassRegModel):
-    """Random Forest.
-
-    Random forests are an ensemble learning method that operate by
-    constructing a multitude of decision trees at training time and
-    outputting the class that is the mode of the classes
-    (classification) or mean prediction (regression) of the individual
-    trees. Random forests correct for decision trees' habit of
-    overfitting to their training set.
-
-    Corresponding estimators are:
-
-    - [RandomForestClassifier][] for classification tasks.
-    - [RandomForestRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][adabdocs].
-
-    !!! warning
-        cuML's implementation of [RandomForestClassifier][cumlrf] only
-        supports predictions on dtype `float32`. Convert all dtypes
-        before calling atom's [run][atomclassifier-run] method to avoid
-        exceptions.
-
-    See Also
-    --------
-    atom.models:DecisionTree
-    atom.models:ExtraTrees
-    atom.models:HistGradientBoosting
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="RF", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "RF"
-    needs_scaling = False
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "ensemble"
-    _estimators = CustomDict(
-        {"class": "RandomForestClassifier", "reg": "RandomForestRegressor"}
-    )
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if not self._get_param("bootstrap", params):
-            params.pop("max_samples")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        if self.goal == "class":
-            criterion = ["gini", "entropy"]
-        else:
-            if self.engine["estimator"] == "cuml":
-                criterion = ["mse", "poisson", "gamma", "inverse_gaussian"]
-            else:
-                criterion = ["squared_error", "absolute_error", "poisson"]
-
-        dist = CustomDict(
-            n_estimators=Int(10, 500, step=10),
-            criterion=Cat(criterion),
-            max_depth=Cat([None, *range(1, 17)]),
-            min_samples_split=Int(2, 20),
-            min_samples_leaf=Int(1, 20),
-            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
-            bootstrap=Cat([True, False]),
-            max_samples=Cat([None, 0.5, 0.6, 0.7, 0.8, 0.9]),
-            ccp_alpha=Float(0, 0.035, step=0.005),
-        )
-
-        if self.engine["estimator"] == "sklearnex":
-            dist.pop("criterion")
-            dist.pop("ccp_alpha")
-        elif self.engine["estimator"] == "cuml":
-            dist.replace_key("criterion", "split_criterion")
-            dist["max_depth"] = Int(1, 17)
-            dist["max_features"] = Cat(["sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9])
-            dist["max_samples"] = Float(0.5, 0.9, step=0.1)
-            dist.pop("ccp_alpha")
-
-        return dist
-
-
-class Ridge(ClassRegModel):
-    """Linear least squares with l2 regularization.
-
-    If classifier, it first converts the target values into {-1, 1}
-    and then treats the problem as a regression task.
-
-    Corresponding estimators are:
-
-    - [RidgeClassifier][] for classification tasks.
-    - [Ridge][ridgeregressor] for regression tasks.
-
-    Read more in sklearn's [documentation][ridgedocs].
-
-    !!! warning
-        Engines `sklearnex` and `cuml` are only available for regression
-        tasks.
-
-    See Also
-    --------
-    atom.models:BayesianRidge
-    atom.models:ElasticNet
-    atom.models:Lasso
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMRegressor
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
-
-    atom = ATOMRegressor(X, y, random_state=1)
-    atom.run(models="Ridge", metric="r2", verbose=2)
-    ```
-
-    """
-
-    acronym = "Ridge"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = True
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"class": "RidgeClassifier", "reg": "Ridge"})
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            alpha=Float(1e-3, 10, log=True),
-            solver=Cat(["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
-        )
-
-        if self.goal == "reg":
-            if self.engine["estimator"] == "sklearnex":
-                dist.pop("solver")  # Only supports 'auto'
-            elif self.engine["estimator"] == "cuml":
-                dist["solver"] = Cat(["eig", "svd", "cd"])
-
-        return dist
-
-
-class StochasticGradientDescent(ClassRegModel):
-    """Stochastic Gradient Descent.
-
-    Stochastic Gradient Descent is a simple yet very efficient approach
-    to fitting linear classifiers and regressors under convex loss
-    functions. Even though SGD has been around in the machine learning
-    community for a long time, it has received a considerable amount of
-    attention just recently in the context of large-scale learning.
-
-    Corresponding estimators are:
-
-    - [SGDClassifier][] for classification tasks.
-    - [SGDRegressor][] for regression tasks.
-
-    Read more in sklearn's [documentation][sgddocs].
-
-    See Also
-    --------
-    atom.models:MultiLayerPerceptron
-    atom.models:PassiveAggressive
-    atom.models:SupportVectorMachine
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="SGD", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "SGD"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "max_iter"
-    supports_engines = ["sklearn"]
-
-    _module = "linear_model"
-    _estimators = CustomDict({"class": "SGDClassifier", "reg": "SGDRegressor"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("penalty", params) != "elasticnet":
-            params.pop("l1_ratio")
-
-        if self._get_param("learning_rate", params) == "optimal":
-            params.pop("eta0")
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        loss = [
-            "hinge",
-            "log_loss",
-            "modified_huber",
-            "squared_hinge",
-            "perceptron",
-            "squared_error",
-            "huber",
-            "epsilon_insensitive",
-            "squared_epsilon_insensitive",
-        ]
-
-        return CustomDict(
-            loss=Cat(loss if self.goal == "class" else loss[-4:]),
-            penalty=Cat([None, "l1", "l2", "elasticnet"]),
-            alpha=Float(1e-4, 1.0, log=True),
-            l1_ratio=Float(0.1, 0.9, step=0.1),
-            max_iter=Int(500, 1500, step=50),
-            epsilon=Float(1e-4, 1.0, log=True),
-            learning_rate=Cat(["constant", "invscaling", "optimal", "adaptive"]),
-            eta0=Float(1e-2, 10, log=True),
-            power_t=Float(0.1, 0.9, step=0.1),
-            average=Cat([True, False]),
-        )
-
-
-class SupportVectorMachine(ClassRegModel):
-    """Support Vector Machine.
-
-    The implementation of the Support Vector Machine is based on libsvm.
-    The fit time scales at least quadratically with the number of
-    samples and may be impractical beyond tens of thousands of samples.
-    For large datasets consider using a [LinearSVM][] or a
-    [StochasticGradientDescent][] model instead.
-
-    Corresponding estimators are:
-
-    - [SVC][] for classification tasks.
-    - [SVR][] for classification tasks.
-
-    Read more in sklearn's [documentation][svmdocs].
-
-    See Also
-    --------
-    atom.models:LinearSVM
-    atom.models:MultiLayerPerceptron
-    atom.models:StochasticGradientDescent
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="SVM", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "SVM"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = None
-    supports_engines = ["sklearn", "sklearnex", "cuml"]
-
-    _module = "svm"
-    _estimators = CustomDict({"class": "SVC", "reg": "SVR"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self.goal == "class":
-            params.pop("epsilon")
-
-        kernel = self._get_param("kernel", params)
-        if kernel == "poly":
-            params.replace_value("gamma", "scale")  # Crashes in combination with "auto"
-        else:
-            params.pop("degree")
-
-        if kernel not in ("rbf", "poly", "sigmoid"):
-            params.pop("gamma")
-
-        if kernel not in ("poly", "sigmoid"):
-            params.pop("coef0")
-
-        return params
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        if self.engine["estimator"] == "cuml" and self.goal == "class":
-            return self._est_class(
-                probability=params.pop("probability", True),
-                random_state=params.pop("random_state", self.random_state),
-                **params)
-        else:
-            return super()._get_est(**params)
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        dist = CustomDict(
-            C=Float(1e-3, 100, log=True),
-            kernel=Cat(["linear", "poly", "rbf", "sigmoid"]),
-            degree=Int(2, 5),
-            gamma=Cat(["scale", "auto"]),
-            coef0=Float(-1.0, 1.0),
-            epsilon=Float(1e-3, 100, log=True),
-            shrinking=Cat([True, False]),
-        )
-
-        if self.engine["estimator"] == "cuml":
-            dist.pop("epsilon")
-            dist.pop("shrinking")
-
-        return dist
-
-
-class XGBoost(ClassRegModel):
-    """Extreme Gradient Boosting.
-
-    XGBoost is an optimized distributed gradient boosting model
-    designed to be highly efficient, flexible and portable. XGBoost
-    provides a parallel tree boosting that solve many data science
-    problems in a fast and accurate way.
-
-    Corresponding estimators are:
-
-    - [XGBClassifier][] for classification tasks.
-    - [XGBRegressor][] for regression tasks.
-
-    Read more in XGBoost's [documentation][xgbdocs].
-
-    See Also
-    --------
-    atom.models:CatBoost
-    atom.models:GradientBoostingMachine
-    atom.models:LightGBM
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMClassifier
-    from sklearn.datasets import load_breast_cancer
-
-    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-    atom = ATOMClassifier(X, y, random_state=1)
-    atom.run(models="XGB", metric="f1", verbose=2)
-    ```
-
-    """
-
-    acronym = "XGB"
-    needs_scaling = True
-    accepts_sparse = True
-    native_multilabel = False
-    native_multioutput = False
-    has_validation = "n_estimators"
-    supports_engines = ["xgboost"]
-
-    _module = "xgboost"
-    _estimators = CustomDict({"class": "XGBClassifier", "reg": "XGBRegressor"})
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        eval_metric = None
-        if getattr(self, "_metric", None):
-            eval_metric = XGBMetric(self._metric[0], task=self.task)
-
-        return self._est_class(
-            eval_metric=params.pop("eval_metric", eval_metric),
-            n_jobs=params.pop("n_jobs", self.n_jobs),
-            tree_method=params.pop("tree_method", "gpu_hist" if self._gpu else None),
-            gpu_id=self._device_id,
-            verbosity=params.pop("verbosity", 0),
-            random_state=params.pop("random_state", self.random_state),
-            **params,
-        )
-
-    def _fit_estimator(
-        self,
-        estimator: PREDICTOR,
-        data: tuple[DATAFRAME, SERIES],
-        est_params_fit: dict,
-        validation: tuple[DATAFRAME, SERIES] | None = None,
-        trial: Trial | None = None,
-    ):
-        """Fit the estimator and perform in-training validation.
-
-        Parameters
-        ----------
-        estimator: Predictor
-            Instance to fit.
-
-        data: tuple
-            Training data of the form (X, y).
-
-        est_params_fit: dict
-            Additional parameters for the estimator's fit method.
-
-        validation: tuple or None
-            Validation data of the form (X, y). If None, no validation
-            is performed.
-
-        trial: [Trial][] or None
-            Active trial (during hyperparameter tuning).
-
-        Returns
-        -------
-        Predictor
-            Fitted instance.
-
-        """
-        m = self._metric[0].name
-        params = est_params_fit.copy()
-
-        callbacks = params.pop("callbacks", [])
-        if trial and len(self._metric) == 1:
-            callbacks.append(XGBoostPruningCallback(trial, f"validation_1-{m}"))
-
-        try:
-            estimator.set_params(callbacks=callbacks)
-            estimator.fit(
-                *data,
-                eval_set=[data, validation] if validation else None,
-                verbose=params.get("verbose", False),
-                **params,
-            )
-        except TrialPruned as ex:
-            # Add the pruned step to the output
-            step = str(ex).split(" ")[-1][:-1]
-            steps = estimator.get_params()[self.has_validation]
-            trial.params[self.has_validation] = f"{step}/{steps}"
-
-            trial.set_user_attr("estimator", estimator)
-            raise ex
-
-        if validation:
-            # Create evals attribute with train and validation scores
-            # Negative because minimizes the function
-            results = estimator.evals_result()
-            self._evals[f"{m}_train"] = np.negative(results["validation_0"][m])
-            self._evals[f"{m}_test"] = np.negative(results["validation_1"][m])
-
-        return estimator
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            n_estimators=Int(20, 500, step=10),
-            learning_rate=Float(0.01, 1.0, log=True),
-            max_depth=Int(1, 20),
-            gamma=Float(0, 1.0),
-            min_child_weight=Int(1, 10),
-            subsample=Float(0.5, 1.0, step=0.1),
-            colsample_bytree=Float(0.4, 1.0, step=0.1),
-            reg_alpha=Float(1e-4, 100, log=True),
-            reg_lambda=Float(1e-4, 100, log=True),
-        )
-
-
-# Time series ====================================================== >>
-
-class ARIMA(ForecastModel):
-    """Autoregressive Integrated Moving Average Model.
-
-    Seasonal ARIMA models and exogeneous input is supported, hence this
-    estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX.
-
-    An ARIMA model, is a generalization of an autoregressive moving
-    average (ARMA) model, and is fitted to time-series data in an effort
-    to forecast future points. ARIMA models can be especially
-    efficacious in cases where data shows evidence of non-stationarity.
-
-    The "AR" part of ARIMA indicates that the evolving variable of
-    interest is regressed on its own lagged (i.e., prior observed)
-    values. The "MA" part indicates that the regression error is
-    actually a linear combination of error terms whose values occurred
-    contemporaneously and at various times in the past. The "I" (for
-    "integrated") indicates that the data values have been replaced with
-    the difference between their values and the previous values (and this
-    differencing process may have been performed more than once).
-
-    Corresponding estimators are:
-
-    - [ARIMA][arimaclass] for forecasting tasks.
-
-    !!! warning
-        ARIMA often runs into numerical errors when optimizing the
-        hyperparameters. Possible solutions are:
-
-        - Use the [AutoARIMA][] model instead.
-        - Use [`est_params`][directforecaster-est_params] to specify the
-          orders manually, e.g. `#!python atom.run("arima", n_trials=5,
-          est_params={"order": (1, 1, 0)})`.
-        - Use the `catch` parameter in [`ht_params`][directforecaster-ht_params]
-          to avoid raising every exception, e.g. `#!python atom.run("arima",
-          n_trials=5, ht_params={"catch": (Exception,)})`.
-
-    See Also
-    --------
-    atom.models:AutoARIMA
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_longley
-
-    _, X = load_longley()
-
-    atom = ATOMForecaster(X)
-    atom.run(models="ARIMA", verbose=2)
-    ```
-
-    """
-
-    acronym = "ARIMA"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.arima"
-    _estimators = CustomDict({"fc": "ARIMA"})
-
-    _order = ("p", "d", "q")
-    _sorder = ("Ps", "Ds", "Qs", "S")
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        # If no seasonal periodicity, set seasonal components to zero
-        if self._get_param("S", params) == 0:
-            for p in self._sorder:
-                params.replace_value(p, 0)
-
-        return params
-
-    def _trial_to_est(self, params: CustomDict) -> CustomDict:
-        """Convert trial's hyperparameters to parameters for the estimator.
-
-        Parameters
-        ----------
-        params: CustomDict
-            Trial's hyperparameters.
-
-        Returns
-        -------
-        CustomDict
-            Estimator's hyperparameters.
-
-        """
-        params = super()._trial_to_est(params)
-
-        # Convert params to hyperparameters order and seasonal_order
-        if all(p in params for p in self._sorder):
-            params.insert(0, "seasonal_order", tuple(params.pop(p) for p in self._sorder))
-        if all(p in params for p in self._order):
-            params.insert(0, "order", tuple(params.pop(p) for p in self._order))
-
-        return params
-
-    def _get_distributions(self) -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        methods = ["newton", "nm", "bfgs", "lbfgs", "powell", "cg", "ncg", "basinhopping"]
-
-        dist = CustomDict(
-            p=Int(0, 2),
-            d=Int(0, 1),
-            q=Int(0, 2),
-            Ps=Int(0, 2),
-            Ds=Int(0, 1),
-            Qs=Int(0, 2),
-            S=Cat([0, 4, 6, 7, 12]),
-            method=Cat(methods),
-            maxiter=Int(50, 200, step=10),
-            with_intercept=Cat([True, False]),
-        )
-
-        # Drop order and seasonal_order params if specified by user
-        if "order" in self._est_params:
-            for p in self._order:
-                dist.pop(p)
-        if "seasonal_order" in self._est_params:
-            for p in self._sorder:
-                dist.pop(p)
-
-        return dist
-
-
-class AutoARIMA(ForecastModel):
-    """Automatic Autoregressive Integrated Moving Average Model.
-
-    [ARIMA][] implementation that includes automated fitting of
-    (S)ARIMA(X) hyperparameters (p, d, q, P, D, Q). The AutoARIMA
-    algorithm seeks to identify the most optimal parameters for an
-    ARIMA model, settling on a single fitted ARIMA model. This process
-    is based on the commonly-used R function.
-
-    AutoARIMA works by conducting differencing tests (i.e.,
-    Kwiatkowski–Phillips–Schmidt–Shin, Augmented Dickey-Fuller or
-    Phillips–Perron) to determine the order of differencing, d, and
-    then fitting models within defined ranges. AutoARIMA also seeks
-    to identify the optimal P and Q hyperparameters after conducting
-    the Canova-Hansen to determine the optimal order of seasonal
-    differencing.
-
-    Note that due to stationarity issues, AutoARIMA might not find a
-    suitable model that will converge. If this is the case, a ValueError
-    is thrown suggesting stationarity-inducing measures be taken prior
-    to re-fitting or that a new range of order values be selected.
-
-    Corresponding estimators are:
-
-    - [AutoARIMA][autoarimaclass] for forecasting tasks.
-
-    See Also
-    --------
-    atom.models:ARIMA
-    atom.models:ETS
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_longley
-
-    _, X = load_longley()
-
-    atom = ATOMForecaster(X, random_state=1)
-    atom.run(models="autoarima", verbose=2)
-    ```
-
-    """
-
-    acronym = "AutoARIMA"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.arima"
-    _estimators = CustomDict({"fc": "AutoARIMA"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        methods = ["newton", "nm", "bfgs", "lbfgs", "powell", "cg", "ncg", "basinhopping"]
-
-        return CustomDict(
-            method=Cat(methods),
-            maxiter=Int(50, 200, step=10),
-            with_intercept=Cat([True, False]),
-        )
-
-
-class ExponentialSmoothing(ForecastModel):
-    """Exponential Smoothing forecaster.
-
-    Holt-Winters exponential smoothing forecaster. The default settings
-    use simple exponential smoothing, without trend and seasonality
-    components.
-
-    Corresponding estimators are:
-
-    - [ExponentialSmoothing][esclass] for forecasting tasks.
-
-    See Also
-    --------
-    atom.models:ARIMA
-    atom.models:ETS
-    atom.models:PolynomialTrend
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_airline
-
-    y = load_airline()
-
-    atom = ATOMForecaster(y, random_state=1)
-    atom.run(models="ES", verbose=2)
-    ```
-
-    """
-
-    acronym = "ES"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.exp_smoothing"
-    _estimators = CustomDict({"fc": "ExponentialSmoothing"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        if self._get_param("trend", params) is None:
-            params.pop("damped_trend")
-
-        if self._get_param("sp", params) is None:
-            params.pop("seasonal")
-
-        return params
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        methods = ["L-BFGS-B", "TNC", "SLSQP", "Powell", "trust-constr", "bh", "ls"]
-
-        return CustomDict(
-            trend=Cat(["add", "mul", None]),
-            damped_trend=Cat([True, False]),
-            seasonal=Cat(["add", "mul", None]),
-            sp=Cat([4, 6, 7, 12, None]),
-            use_boxcox=Cat([True, False]),
-            initialization_method=Cat(["estimated", "heuristic"]),
-            method=Cat(methods),
-        )
-
-
-class ETS(ForecastModel):
-    """ETS model with automatic fitting capabilities.
-
-    The ETS models are a family of time series models with an
-    underlying state space model consisting of a level component,
-    a trend component (T), a seasonal component (S), and an error
-    term (E).
-
-    Corresponding estimators are:
-
-    - [AutoETS][] for forecasting tasks.
-
-    See Also
-    --------
-    atom.models:ARIMA
-    atom.models:ExponentialSmoothing
-    atom.models:PolynomialTrend
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_airline
-
-    y = load_airline()
-
-    atom = ATOMForecaster(y, random_state=1)
-    atom.run(models="ETS", verbose=2)
-
-    ```
-
-    """
-
-    acronym = "ETS"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.ets"
-    _estimators = CustomDict({"fc": "AutoETS"})
-
-    def _get_parameters(self, trial: Trial) -> CustomDict:
-        """Get the trial's hyperparameters.
-
-        Parameters
-        ----------
-        trial: [Trial][]
-            Current trial.
-
-        Returns
-        -------
-        CustomDict
-            Trial's hyperparameters.
-
-        """
-        params = super()._get_parameters(trial)
-
-        # If no seasonal periodicity, set seasonal components to zero
-        if self._get_param("sp", params) == 1:
-            params.pop("seasonal")
-
-        return params
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            error=Cat(["add", "mul"]),
-            trend=Cat(["add", "mul", None]),
-            damped_trend=Cat([True, False]),
-            seasonal=Cat(["add", "mul", None]),
-            sp=Cat([1, 4, 6, 7, 12]),
-            initialization_method=Cat(["estimated", "heuristic"]),
-            maxiter=Int(500, 2000, step=100),
-            auto=Cat([True, False]),
-            information_criterion=Cat(["aic", "bic", "aicc"]),
-        )
-
-
-class NaiveForecaster(ForecastModel):
-    """Naive Forecaster.
-
-    NaiveForecaster is a dummy forecaster that makes forecasts using
-    simple strategies based on naive assumptions about past trends
-    continuing. When used in [multivariate][] tasks, each column is
-    forecasted with the same strategy.
-
-    Corresponding estimators are:
-
-    - [NaiveForecaster][naiveforecasterclass] for forecasting tasks.
-
-    See Also
-    --------
-    atom.models:ExponentialSmoothing
-    atom.models:Dummy
-    atom.models:PolynomialTrend
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_airline
-
-    y = load_airline()
-
-    atom = ATOMForecaster(y, random_state=1)
-    atom.run(models="NF", verbose=2)
-
-    ```
-
-    """
-
-    acronym = "NF"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.naive"
-    _estimators = CustomDict({"fc": "NaiveForecaster"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(strategy=Cat(["last", "mean", "drift"]))
-
-
-class PolynomialTrend(ForecastModel):
-    """Polynomial Trend forecaster.
-
-    Forecast time series data with a polynomial trend, using a sklearn
-    [LinearRegression][] class to regress values of time series on
-    index, after extraction of polynomial features.
-
-    Corresponding estimators are:
-
-    - [PolynomialTrendForecaster][] for forecasting tasks.
-
-    See Also
-    --------
-    atom.models:ARIMA
-    atom.models:ETS
-    atom.models:NaiveForecaster
-
-    Examples
-    --------
-    ```pycon
-    from atom import ATOMForecaster
-    from sktime.datasets import load_airline
-
-    y = load_airline()
-
-    atom = ATOMForecaster(y, random_state=1)
-    atom.run(models="PT", verbose=2)
-    ```
-
-    """
-
-    acronym = "PT"
-    needs_scaling = False
-    accepts_sparse = False
-    native_multilabel = False
-    native_multioutput = True
-    has_validation = None
-    supports_engines = ["sktime"]
-
-    _module = "sktime.forecasting.trend"
-    _estimators = CustomDict({"fc": "PolynomialTrendForecaster"})
-
-    @staticmethod
-    def _get_distributions() -> CustomDict:
-        """Get the predefined hyperparameter distributions.
-
-        Returns
-        -------
-        CustomDict
-            Hyperparameter distributions.
-
-        """
-        return CustomDict(
-            degree=Int(1, 5),
-            with_intercept=Cat([True, False]),
-        )
-
-
-# Ensembles ======================================================== >>
-
-class Stacking(ClassRegModel):
-    """Stacking ensemble.
-
-    Parameters
-    ----------
-    models: ClassMap
-        Models from which to build the ensemble.
-
-    **kwargs
-        Additional keyword arguments for the estimator.
-
-    """
-
-    acronym = "Stack"
-    needs_scaling = False
-    has_validation = None
-    native_multilabel = False
-    native_multioutput = False
-    supports_engines = []
-
-    _module = "atom.ensembles"
-    _estimators = CustomDict({"class": "StackingClassifier", "reg": "StackingRegressor"})
-
-    def __init__(self, models: ClassMap, **kwargs):
-        self._models = models
-        kw_model = {k: v for k, v in kwargs.items() if k in sign(ClassRegModel.__init__)}
-        super().__init__(**kw_model)
-        self._est_params = {k: v for k, v in kwargs.items() if k not in kw_model}
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        estimators = []
-        for m in self._models:
-            if m.scaler:
-                name = f"pipeline_{m.name}"
-                est = Pipeline([("scaler", m.scaler), (m.name, m.estimator)])
-            else:
-                name = m.name
-                est = m.estimator
-
-            estimators.append((name, est))
-
-        return self._est_class(
-            estimators=estimators,
-            n_jobs=params.pop("n_jobs", self.n_jobs),
-            **params,
-        )
-
-
-class Voting(ClassRegModel):
-    """Voting ensemble.
-
-    Parameters
-    ----------
-    models: ClassMap
-        Models from which to build the ensemble.
-
-    **kwargs
-        Additional keyword arguments for the estimator.
-
-    """
-
-    acronym = "Vote"
-    needs_scaling = False
-    has_validation = None
-    native_multilabel = False
-    native_multioutput = False
-    supports_engines = []
-
-    _module = "atom.ensembles"
-    _estimators = CustomDict({"class": "VotingClassifier", "reg": "VotingRegressor"})
-
-    def __init__(self, models: ClassMap, **kwargs):
-        self._models = models
-        kw_model = {k: v for k, v in kwargs.items() if k in sign(ClassRegModel.__init__)}
-        super().__init__(**kw_model)
-        self._est_params = {k: v for k, v in kwargs.items() if k not in kw_model}
-
-        if self._est_params.get("voting") == "soft":
-            for m in self._models:
-                if not hasattr(m.estimator, "predict_proba"):
-                    raise ValueError(
-                        "Invalid value for the voting parameter. If "
-                        "'soft', all models in the ensemble should have "
-                        f"a predict_proba method, got {m._fullname}."
-                    )
-
-    def _get_est(self, **params) -> PREDICTOR:
-        """Get the model's estimator with unpacked parameters.
-
-        Returns
-        -------
-        Predictor
-            Estimator instance.
-
-        """
-        estimators = []
-        for m in self._models:
-            if m.scaler:
-                name = f"pipeline_{m.name}"
-                est = Pipeline([("scaler", m.scaler), (m.name, m.estimator)])
-            else:
-                name = m.name
-                est = m.estimator
-
-            estimators.append((name, est))
-
-        return self._est_class(
-            estimators=estimators,
-            n_jobs=params.pop("n_jobs", self.n_jobs),
-            **params,
-        )
-
-
-# Variables ======================================================== >>
-
-# Available models
-MODELS = ClassMap(
-    AdaBoost,
-    ARIMA,
-    AutoARIMA,
-    AutomaticRelevanceDetermination,
-    Bagging,
-    BayesianRidge,
-    BernoulliNB,
-    CatBoost,
-    CategoricalNB,
-    ComplementNB,
-    DecisionTree,
-    Dummy,
-    ElasticNet,
-    ETS,
-    ExponentialSmoothing,
-    ExtraTree,
-    ExtraTrees,
-    GaussianNB,
-    GaussianProcess,
-    GradientBoostingMachine,
-    HuberRegression,
-    HistGradientBoosting,
-    KNearestNeighbors,
-    Lasso,
-    LeastAngleRegression,
-    LightGBM,
-    LinearDiscriminantAnalysis,
-    LinearSVM,
-    LogisticRegression,
-    MultiLayerPerceptron,
-    MultinomialNB,
-    NaiveForecaster,
-    OrdinaryLeastSquares,
-    OrthogonalMatchingPursuit,
-    PassiveAggressive,
-    Perceptron,
-    PolynomialTrend,
-    QuadraticDiscriminantAnalysis,
-    RadiusNearestNeighbors,
-    RandomForest,
-    Ridge,
-    StochasticGradientDescent,
-    SupportVectorMachine,
-    XGBoost,
-    key="acronym",
-)
-
-# Available ensembles
-ENSEMBLES = ClassMap(Stacking, Voting, key="acronym")
-
-# Available models + ensembles
-MODELS_ENSEMBLES = ClassMap(*MODELS, *ENSEMBLES, key="acronym")
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing classification and regression models.
+
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from optuna.distributions import CategoricalDistribution as Cat
+from optuna.distributions import FloatDistribution as Float
+from optuna.distributions import IntDistribution as Int
+from optuna.exceptions import TrialPruned
+from optuna.integration import (
+    CatBoostPruningCallback, LightGBMPruningCallback, XGBoostPruningCallback,
+)
+from optuna.trial import Trial
+
+from atom.basemodel import ClassRegModel
+from atom.utils.types import DATAFRAME, PANDAS, PREDICTOR
+from atom.utils.utils import CatBMetric, CustomDict, LGBMetric, XGBMetric
+
+
+class AdaBoost(ClassRegModel):
+    """Adaptive Boosting (with decision tree as base estimator).
+
+    AdaBoost is a meta-estimator that begins by fitting a
+    classifier/regressor on the original dataset and then fits
+    additional copies of the algorithm on the same dataset but where
+    the weights of instances are adjusted according to the error of
+    the current prediction.
+
+    Corresponding estimators are:
+
+    - [AdaBoostClassifier][] for classification tasks.
+    - [AdaBoostRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][adabdocs].
+
+    See Also
+    --------
+    atom.models:GradientBoostingMachine
+    atom.models:RandomForest
+    atom.models:XGBoost
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="AdaB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "AdaB"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "ensemble"
+    _estimators = CustomDict({"class": "AdaBoostClassifier", "reg": "AdaBoostRegressor"})
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            n_estimators=Int(50, 500, step=10),
+            learning_rate=Float(0.01, 10, log=True),
+        )
+
+        if self.goal == "class":
+            dist["algorithm"] = Cat(["SAMME.R", "SAMME"])
+        else:
+            dist["loss"] = Cat(["linear", "square", "exponential"])
+
+        return dist
+
+
+class AutomaticRelevanceDetermination(ClassRegModel):
+    """Automatic Relevance Determination.
+
+    Automatic Relevance Determination is very similar to
+    [BayesianRidge][], but can lead to sparser coefficients. Fit the
+    weights of a regression model, using an ARD prior. The weights of
+    the regression model are assumed to be in Gaussian distributions.
+
+    Corresponding estimators are:
+
+    - [ARDRegression][] for regression tasks.
+
+    Read more in sklearn's [documentation][arddocs].
+
+    See Also
+    --------
+    atom.models:BayesianRidge
+    atom.models:GaussianProcess
+    atom.models:LeastAngleRegression
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="ARD", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "ARD"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "ARDRegression"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_iter=Int(100, 1000, step=10),
+            alpha_1=Float(1e-4, 1, log=True),
+            alpha_2=Float(1e-4, 1, log=True),
+            lambda_1=Float(1e-4, 1, log=True),
+            lambda_2=Float(1e-4, 1, log=True),
+        )
+
+
+class Bagging(ClassRegModel):
+    """Bagging model (with decision tree as base estimator).
+
+    Bagging uses an ensemble meta-estimator that fits base predictors
+    on random subsets of the original dataset and then aggregate their
+    individual predictions (either by voting or by averaging) to form a
+    final prediction. Such a meta-estimator can typically be used as a
+    way to reduce the variance of a black-box estimator by introducing
+    randomization into its construction procedure and then making an
+    ensemble out of it.
+
+    Corresponding estimators are:
+
+    - [BaggingClassifier][] for classification tasks.
+    - [BaggingRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][bagdocs].
+
+    See Also
+    --------
+    atom.models:DecisionTree
+    atom.models:LogisticRegression
+    atom.models:RandomForest
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="Bag", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "Bag"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "ensemble"
+    _estimators = CustomDict({"class": "BaggingClassifier", "reg": "BaggingRegressor"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_estimators=Int(10, 500, step=10),
+            max_samples=Float(0.5, 1.0, step=0.1),
+            max_features=Float(0.5, 1.0, step=0.1),
+            bootstrap=Cat([True, False]),
+            bootstrap_features=Cat([True, False]),
+        )
+
+
+class BayesianRidge(ClassRegModel):
+    """Bayesian ridge regression.
+
+    Bayesian regression techniques can be used to include regularization
+    parameters in the estimation procedure: the regularization parameter
+    is not set in a hard sense but tuned to the data at hand.
+
+    Corresponding estimators are:
+
+    - [BayesianRidge][bayesianridgeclass] for regression tasks.
+
+    Read more in sklearn's [documentation][brdocs].
+
+    See Also
+    --------
+    atom.models:AutomaticRelevanceDetermination
+    atom.models:GaussianProcess
+    atom.models:LeastAngleRegression
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="BR", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "BR"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "BayesianRidge"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_iter=Int(100, 1000, step=10),
+            alpha_1=Float(1e-4, 1, log=True),
+            alpha_2=Float(1e-4, 1, log=True),
+            lambda_1=Float(1e-4, 1, log=True),
+            lambda_2=Float(1e-4, 1, log=True),
+        )
+
+
+class BernoulliNB(ClassRegModel):
+    """Bernoulli Naive Bayes.
+
+    BernoulliNB implements the Naive Bayes algorithm for multivariate
+    Bernoulli models. Like [MultinomialNB][], this classifier is
+    suitable for discrete data. The difference is that while MNB works
+    with occurrence counts, BNB is designed for binary/boolean features.
+
+    Corresponding estimators are:
+
+    - [BernoulliNB][bernoullinbclass] for classification tasks.
+
+    Read more in sklearn's [documentation][bnbdocs].
+
+    See Also
+    --------
+    atom.models:ComplementNB
+    atom.models:CategoricalNB
+    atom.models:MultinomialNB
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="BNB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "BNB"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "naive_bayes"
+    _estimators = CustomDict({"class": "BernoulliNB"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(0.01, 10, log=True),
+            fit_prior=Cat([True, False]),
+        )
+
+
+class CatBoost(ClassRegModel):
+    """Cat Boosting Machine.
+
+    CatBoost is a machine learning method based on gradient boosting
+    over decision trees. Main advantages of CatBoost:
+
+    - Superior quality when compared with other GBDT models on many
+      datasets.
+    - Best in class prediction speed.
+
+    Corresponding estimators are:
+
+    - [CatBoostClassifier][] for classification tasks.
+    - [CatBoostRegressor][] for regression tasks.
+
+    Read more in CatBoost's [documentation][catbdocs].
+
+    !!! warning
+        * CatBoost selects the weights achieved by the best evaluation
+          on the test set after training. This means that, by default,
+          there is some minor data leakage in the test set. Use the
+          `use_best_model=False` parameter to avoid this behavior or use
+          a [holdout set][data-sets] to evaluate the final estimator.
+        * [In-training validation][] and [pruning][] are disabled when
+          `#!python device="gpu"`.
+
+    !!! note
+        ATOM uses CatBoost's `n_estimators` parameter instead of
+        `iterations` to indicate the number of trees to fit. This is
+        done to have consistent naming with the [XGBoost][] and
+        [LightGBM][] models.
+
+    See Also
+    --------
+    atom.models:GradientBoostingMachine
+    atom.models:LightGBM
+    atom.models:XGBoost
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="CatB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "CatB"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "n_estimators"
+    supports_engines = ["catboost"]
+
+    _module = "catboost"
+    _estimators = CustomDict({"class": "CatBoostClassifier", "reg": "CatBoostRegressor"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("bootstrap_type", params) == "Bernoulli":
+            params.pop("bagging_temperature")
+        elif self._get_param("bootstrap_type", params) == "Bayesian":
+            params.pop("subsample")
+
+        return params
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the estimator instance.
+
+        Parameters
+        ----------
+        **params
+            Unpacked hyperparameters for the estimator.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        eval_metric = None
+        if getattr(self, "_metric", None) and not self._gpu:
+            eval_metric = CatBMetric(self._metric[0], task=self.task)
+
+        return self._est_class(
+            eval_metric=params.pop("eval_metric", eval_metric),
+            train_dir=params.pop("train_dir", ""),
+            allow_writing_files=params.pop("allow_writing_files", False),
+            thread_count=params.pop("n_jobs", self.n_jobs),
+            task_type=params.pop("task_type", "GPU" if self._gpu else "CPU"),
+            devices=str(self._device_id),
+            verbose=params.pop("verbose", False),
+            random_state=params.pop("random_state", self.random_state),
+            **params,
+        )
+
+    def _fit_estimator(
+        self,
+        estimator: PREDICTOR,
+        data: tuple[DATAFRAME, PANDAS],
+        est_params_fit: dict,
+        validation: tuple[DATAFRAME, PANDAS] | None = None,
+        trial: Trial | None = None,
+    ):
+        """Fit the estimator and perform in-training validation.
+
+        Parameters
+        ----------
+        estimator: Predictor
+            Instance to fit.
+
+        data: tuple
+            Training data of the form (X, y).
+
+        est_params_fit: dict
+            Additional parameters for the estimator's fit method.
+
+        validation: tuple or None
+            Validation data of the form (X, y). If None, no validation
+            is performed.
+
+        trial: [Trial][] or None
+            Active trial (during hyperparameter tuning).
+
+        Returns
+        -------
+        Predictor
+            Fitted instance.
+
+        """
+        params = est_params_fit.copy()
+
+        callbacks = params.pop("callbacks", [])
+        if trial and len(self._metric) == 1 and not self._gpu:
+            callbacks.append(cb := CatBoostPruningCallback(trial, "CatBMetric"))
+
+        # gpu implementation fails if callbacks!=None
+        estimator.fit(*data, eval_set=validation, callbacks=callbacks or None, **params)
+
+        if not self._gpu:
+            if validation:
+                # Create evals attribute with train and validation scores
+                m = self._metric[0].name
+                evals = estimator.evals_result_
+                self._evals[f"{m}_train"] = evals["learn"]["CatBMetric"]
+                self._evals[f"{m}_test"] = evals["validation"]["CatBMetric"]
+
+            if trial and len(self._metric) == 1 and cb._pruned:
+                # Add the pruned step to the output
+                step = len(self.evals[f'{m}_train'])
+                steps = estimator.get_params()[self.has_validation]
+                trial.params[self.has_validation] = f"{step}/{steps}"
+
+                trial.set_user_attr("estimator", estimator)
+                raise TrialPruned(cb._message)
+
+        return estimator
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_estimators=Int(20, 500, step=10),
+            learning_rate=Float(0.01, 1.0, log=True),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_child_samples=Int(1, 30),
+            bootstrap_type=Cat(["Bayesian", "Bernoulli"]),
+            bagging_temperature=Float(0, 10),
+            subsample=Float(0.5, 1.0, step=0.1),
+            reg_lambda=Float(0.001, 100, log=True),
+        )
+
+
+class CategoricalNB(ClassRegModel):
+    """Categorical Naive Bayes.
+
+    Categorical Naive Bayes implements the Naive Bayes algorithm for
+    categorical features.
+
+    Corresponding estimators are:
+
+    - [CategoricalNB][categoricalnbclass] for classification tasks.
+
+    Read more in sklearn's [documentation][catnbdocs].
+
+    See Also
+    --------
+    atom.models:BernoulliNB
+    atom.models:ComplementNB
+    atom.models:GaussianNB
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    import numpy as np
+
+    X = np.random.randint(5, size=(100, 100))
+    y = np.random.randint(2, size=100)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="CatNB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "CatNB"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "naive_bayes"
+    _estimators = CustomDict({"class": "CategoricalNB"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(0.01, 10, log=True),
+            fit_prior=Cat([True, False]),
+        )
+
+
+class ComplementNB(ClassRegModel):
+    """Complement Naive Bayes.
+
+    The Complement Naive Bayes classifier was designed to correct the
+    "severe assumptions" made by the standard [MultinomialNB][]
+    classifier. It is particularly suited for imbalanced datasets.
+
+    Corresponding estimators are:
+
+    - [ComplementNB][complementnbclass] for classification tasks.
+
+    Read more in sklearn's [documentation][cnbdocs].
+
+    See Also
+    --------
+    atom.models:BernoulliNB
+    atom.models:CategoricalNB
+    atom.models:MultinomialNB
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="CNB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "CNB"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "naive_bayes"
+    _estimators = CustomDict({"class": "ComplementNB"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(0.01, 10, log=True),
+            fit_prior=Cat([True, False]),
+            norm=Cat([True, False]),
+        )
+
+
+class DecisionTree(ClassRegModel):
+    """Single Decision Tree.
+
+    A single decision tree classifier/regressor.
+
+    Corresponding estimators are:
+
+    - [DecisionTreeClassifier][] for classification tasks.
+    - [DecisionTreeRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][treedocs].
+
+    See Also
+    --------
+    atom.models:ExtraTree
+    atom.models:ExtraTrees
+    atom.models:RandomForest
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="Tree", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "Tree"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "tree"
+    _estimators = CustomDict(
+        {"class": "DecisionTreeClassifier", "reg": "DecisionTreeRegressor"}
+    )
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        if self.goal == "class":
+            criterion = ["gini", "entropy"]
+        else:
+            criterion = ["squared_error", "absolute_error", "friedman_mse", "poisson"]
+
+        return CustomDict(
+            criterion=Cat(criterion),
+            splitter=Cat(["best", "random"]),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_samples_split=Int(2, 20),
+            min_samples_leaf=Int(1, 20),
+            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
+            ccp_alpha=Float(0, 0.035, step=0.005),
+        )
+
+
+class Dummy(ClassRegModel):
+    """Dummy classifier/regressor.
+
+    When doing supervised learning, a simple sanity check consists of
+    comparing one's estimator against simple rules of thumb. The
+    prediction methods completely ignore the input data. Do not use
+    this model for real problems. Use it only as a simple baseline
+    to compare with other models.
+
+    Corresponding estimators are:
+
+    - [DummyClassifier][] for classification tasks.
+    - [DummyRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][dummydocs].
+
+    See Also
+    --------
+    atom.models:DecisionTree
+    atom.models:ExtraTree
+    atom.models:NaiveForecaster
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="Dummy", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "Dummy"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "dummy"
+    _estimators = CustomDict({"class": "DummyClassifier", "reg": "DummyRegressor"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("strategy", params) != "quantile":
+            params.pop("quantile")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict()
+        if self.goal == "class":
+            dist["strategy"] = Cat(["most_frequent", "prior", "stratified", "uniform"])
+        else:
+            dist["strategy"] = Cat(["mean", "median", "quantile"])
+            dist["quantile"] = Float(0, 1.0, step=0.1)
+
+        return dist
+
+
+class ElasticNet(ClassRegModel):
+    """Linear Regression with elasticnet regularization.
+
+    Linear least squares with l1 and l2 regularization.
+
+    Corresponding estimators are:
+
+    - [ElasticNet][elasticnetreg] for regression tasks.
+
+    Read more in sklearn's [documentation][endocs].
+
+    See Also
+    --------
+    atom.models:Lasso
+    atom.models:OrdinaryLeastSquares
+    atom.models:Ridge
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="EN", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "EN"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "ElasticNet"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(1e-3, 10, log=True),
+            l1_ratio=Float(0.1, 0.9, step=0.1),
+            selection=Cat(["cyclic", "random"]),
+        )
+
+
+class ExtraTree(ClassRegModel):
+    """Extremely Randomized Tree.
+
+    Extra-trees differ from classic decision trees in the way they are
+    built. When looking for the best split to separate the samples of a
+    node into two groups, random splits are drawn for each of the
+    max_features randomly selected features and the best split among
+    those is chosen. When max_features is set 1, this amounts to
+    building a totally random decision tree.
+
+    Corresponding estimators are:
+
+    - [ExtraTreeClassifier][] for classification tasks.
+    - [ExtraTreeRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][treedocs].
+
+    See Also
+    --------
+    atom.models:DecisionTree
+    atom.models:ExtraTrees
+    atom.models:RandomForest
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="ETree", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "ETree"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "tree"
+    _estimators = CustomDict(
+        {"class": "ExtraTreeClassifier", "reg": "ExtraTreeRegressor"}
+    )
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if not self._get_param("bootstrap", params):
+            params.pop("max_samples")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        if self.goal == "class":
+            criterion = ["gini", "entropy"]
+        else:
+            criterion = ["squared_error", "absolute_error"]
+
+        return CustomDict(
+            criterion=Cat(criterion),
+            splitter=Cat(["random", "best"]),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_samples_split=Int(2, 20),
+            min_samples_leaf=Int(1, 20),
+            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
+            ccp_alpha=Float(0, 0.035, step=0.005),
+        )
+
+
+class ExtraTrees(ClassRegModel):
+    """Extremely Randomized Trees.
+
+    Extra-Trees use a meta estimator that fits a number of randomized
+    decision trees (a.k.a. [extra-trees][extratree]) on various
+    sub-samples of the dataset and uses averaging to improve the
+    predictive accuracy and control over-fitting.
+
+    Corresponding estimators are:
+
+    - [ExtraTreesClassifier][] for classification tasks.
+    - [ExtraTreesRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][etdocs].
+
+    See Also
+    --------
+    atom.models:DecisionTree
+    atom.models:ExtraTree
+    atom.models:RandomForest
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="ET", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "ET"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "ensemble"
+    _estimators = CustomDict(
+        {"class": "ExtraTreesClassifier", "reg": "ExtraTreesRegressor"}
+    )
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if not self._get_param("bootstrap", params):
+            params.pop("max_samples")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        if self.goal == "class":
+            criterion = ["gini", "entropy"]
+        else:
+            criterion = ["squared_error", "absolute_error"]
+
+        return CustomDict(
+            n_estimators=Int(10, 500, step=10),
+            criterion=Cat(criterion),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_samples_split=Int(2, 20),
+            min_samples_leaf=Int(1, 20),
+            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
+            bootstrap=Cat([True, False]),
+            max_samples=Cat([None, 0.5, 0.6, 0.7, 0.8, 0.9]),
+            ccp_alpha=Float(0, 0.035, step=0.005),
+        )
+
+
+class GaussianNB(ClassRegModel):
+    """Gaussian Naive Bayes.
+
+    Gaussian Naive Bayes implements the Naive Bayes algorithm for
+    classification. The likelihood of the features is assumed to
+    be Gaussian.
+
+    Corresponding estimators are:
+
+    - [GaussianNB][gaussiannbclass] for classification tasks.
+
+    Read more in sklearn's [documentation][gnbdocs].
+
+    See Also
+    --------
+    atom.models:BernoulliNB
+    atom.models:CategoricalNB
+    atom.models:ComplementNB
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="GNB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "GNB"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "naive_bayes"
+    _estimators = CustomDict({"class": "GaussianNB"})
+
+
+class GaussianProcess(ClassRegModel):
+    """Gaussian process.
+
+    Gaussian Processes are a generic supervised learning method
+    designed to solve regression and probabilistic classification
+    problems. The advantages of Gaussian processes are:
+
+    * The prediction interpolates the observations.
+    * The prediction is probabilistic (Gaussian) so that one can compute
+      empirical confidence intervals and decide based on those if one
+      should refit (online fitting, adaptive fitting) the prediction in
+      some region of interest.
+
+    The disadvantages of Gaussian processes include:
+
+    * They are not sparse, i.e. they use the whole samples/features
+      information to perform the prediction.
+    * They lose efficiency in high dimensional spaces, namely when the
+      number of features exceeds a few dozens.
+
+    Corresponding estimators are:
+
+    - [GaussianProcessClassifier][] for classification tasks.
+    - [GaussianProcessRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][gpdocs].
+
+    See Also
+    --------
+    atom.models:GaussianNB
+    atom.models:LinearDiscriminantAnalysis
+    atom.models:PassiveAggressive
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="GP", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "GP"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "gaussian_process"
+    _estimators = CustomDict(
+        {"class": "GaussianProcessClassifier", "reg": "GaussianProcessRegressor"}
+    )
+
+
+class GradientBoostingMachine(ClassRegModel):
+    """Gradient Boosting Machine.
+
+    A Gradient Boosting Machine builds an additive model in a forward
+    stage-wise fashion; it allows for the optimization of arbitrary
+    differentiable loss functions. In each stage `n_classes_` regression
+    trees are fit on the negative gradient of the loss function, e.g.
+    binary or multiclass log loss. Binary classification is a special
+    case where only a single regression tree is induced.
+
+    Corresponding estimators are:
+
+    - [GradientBoostingClassifier][] for classification tasks.
+    - [GradientBoostingRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][gbmdocs].
+
+    !!! tip
+        [HistGradientBoosting][] is a much faster variant of this
+        algorithm for intermediate datasets (n_samples >= 10k).
+
+    See Also
+    --------
+    atom.models:CatBoost
+    atom.models:HistGradientBoosting
+    atom.models:LightGBM
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="GBM", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "GBM"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "ensemble"
+    _estimators = CustomDict(
+        {"class": "GradientBoostingClassifier", "reg": "GradientBoostingRegressor"}
+    )
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("loss", params) not in ("huber", "quantile"):
+            params.pop("alpha")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            loss=Cat(["log_loss", "exponential"]),
+            learning_rate=Float(0.01, 1.0, log=True),
+            n_estimators=Int(10, 500, step=10),
+            subsample=Float(0.5, 1.0, step=0.1),
+            criterion=Cat(["friedman_mse", "squared_error"]),
+            min_samples_split=Int(2, 20),
+            min_samples_leaf=Int(1, 20),
+            max_depth=Int(1, 21),
+            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
+            ccp_alpha=Float(0, 0.035, step=0.005),
+        )
+
+        if self.task.startswith("multiclass"):
+            dist.pop("loss")  # Multiclass only supports log_loss
+        elif self.goal.startswith("reg"):
+            dist["loss"] = Cat(["squared_error", "absolute_error", "huber", "quantile"])
+            dist["alpha"] = Float(0.1, 0.9, step=0.1)
+
+        return dist
+
+
+class HuberRegression(ClassRegModel):
+    """Huber regressor.
+
+    Huber is a linear regression model that is robust to outliers. It
+    makes sure that the loss function is not heavily influenced by the
+    outliers while not completely ignoring their effect.
+
+    Corresponding estimators are:
+
+    - [HuberRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][huberdocs].
+
+    See Also
+    --------
+    atom.models:AutomaticRelevanceDetermination
+    atom.models:LeastAngleRegression
+    atom.models:OrdinaryLeastSquares
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="Huber", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "Huber"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "HuberRegressor"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            epsilon=Float(1, 10, log=True),
+            max_iter=Int(50, 500, step=10),
+            alpha=Float(1e-4, 1, log=True),
+        )
+
+
+class HistGradientBoosting(ClassRegModel):
+    """Histogram-based Gradient Boosting Machine.
+
+    This Histogram-based Gradient Boosting Machine is much faster than
+    the standard [GradientBoostingMachine][] for big datasets
+    (n_samples>=10k). This variation first bins the input samples into
+    integer-valued bins which tremendously reduces the number of
+    splitting points to consider, and allows the algorithm to leverage
+    integer-based data structures (histograms) instead of relying on
+    sorted continuous values when building the trees.
+
+    Corresponding estimators are:
+
+    - [HistGradientBoostingClassifier][] for classification tasks.
+    - [HistGradientBoostingRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][hgbmdocs].
+
+    See Also
+    --------
+    atom.models:CatBoost
+    atom.models:GradientBoostingMachine
+    atom.models:XGBoost
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="hGBM", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "hGBM"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "ensemble"
+    _estimators = CustomDict(
+        {
+            "class": "HistGradientBoostingClassifier",
+            "reg": "HistGradientBoostingRegressor",
+        }
+    )
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            loss=Cat(["squared_error", "absolute_error", "poisson", "quantile", "gamma"]),
+            learning_rate=Float(0.01, 1.0, log=True),
+            max_iter=Int(10, 500, step=10),
+            max_leaf_nodes=Int(10, 50),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_samples_leaf=Int(10, 30),
+            l2_regularization=Float(0, 1.0, step=0.1),
+        )
+
+        if self.goal == "class":
+            dist.pop("loss")
+
+        return dist
+
+
+class KNearestNeighbors(ClassRegModel):
+    """K-Nearest Neighbors.
+
+    K-Nearest Neighbors, as the name clearly indicates, implements the
+    k-nearest neighbors vote. For regression, the target is predicted
+    by local interpolation of the targets associated of the nearest
+    neighbors in the training set.
+
+    Corresponding estimators are:
+
+    - [KNeighborsClassifier][] for classification tasks.
+    - [KNeighborsRegressor][] for classification tasks.
+
+    Read more in sklearn's [documentation][knndocs].
+
+    See Also
+    --------
+    atom.models:LinearDiscriminantAnalysis
+    atom.models:QuadraticDiscriminantAnalysis
+    atom.models:RadiusNearestNeighbors
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="KNN", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "KNN"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "neighbors"
+    _estimators = CustomDict(
+        {"class": "KNeighborsClassifier", "reg": "KNeighborsRegressor"}
+    )
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            n_neighbors=Int(1, 100),
+            weights=Cat(["uniform", "distance"]),
+            algorithm=Cat(["auto", "ball_tree", "kd_tree", "brute"]),
+            leaf_size=Int(20, 40),
+            p=Int(1, 2),
+        )
+
+        if self._gpu:
+            dist.pop("algorithm")  # Only 'brute' is supported
+            if self.engine.get("estimator") == "cuml":
+                dist.pop("weights")  # Only 'uniform' is supported
+                dist.pop("leaf_size")
+                dist.pop("p")
+
+        return dist
+
+
+class Lasso(ClassRegModel):
+    """Linear Regression with lasso regularization.
+
+    Linear least squares with l1 regularization.
+
+    Corresponding estimators are:
+
+    - [Lasso][lassoreg] for regression tasks.
+
+    Read more in sklearn's [documentation][lassodocs].
+
+    See Also
+    --------
+    atom.models:ElasticNet
+    atom.models:OrdinaryLeastSquares
+    atom.models:Ridge
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="Lasso", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "Lasso"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "Lasso"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(1e-3, 10, log=True),
+            selection=Cat(["cyclic", "random"]),
+        )
+
+
+class LeastAngleRegression(ClassRegModel):
+    """Least Angle Regression.
+
+    Least-Angle Regression is a regression algorithm for
+    high-dimensional data. Lars is similar to forward stepwise
+    regression. At each step, it finds the feature most correlated
+    with the target. When there are multiple features having equal
+    correlation, instead of continuing along the same feature, it
+    proceeds in a direction equiangular between the features.
+
+    Corresponding estimators are:
+
+    - [Lars][] for regression tasks.
+
+    Read more in sklearn's [documentation][larsdocs].
+
+    See Also
+    --------
+    atom.models:BayesianRidge
+    atom.models:HuberRegression
+    atom.models:OrdinaryLeastSquares
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="Lars", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "Lars"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "Lars"})
+
+
+class LightGBM(ClassRegModel):
+    """Light Gradient Boosting Machine.
+
+    LightGBM is a gradient boosting model that uses tree based learning
+    algorithms. It is designed to be distributed and efficient with the
+    following advantages:
+
+    - Faster training speed and higher efficiency.
+    - Lower memory usage.
+    - Better accuracy.
+    - Capable of handling large-scale data.
+
+    Corresponding estimators are:
+
+    - [LGBMClassifier][] for classification tasks.
+    - [LGBMRegressor][] for regression tasks.
+
+    Read more in LightGBM's [documentation][lgbdocs].
+
+    !!! info
+        Using LightGBM's [GPU acceleration][estimator-acceleration]
+        requires [additional software dependencies][lgb_gpu].
+
+    See Also
+    --------
+    atom.models:CatBoost
+    atom.models:GradientBoostingMachine
+    atom.models:XGBoost
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="LGB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "LGB"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "n_estimators"
+    supports_engines = ["lightgbm"]
+
+    _module = "lightgbm.sklearn"
+    _estimators = CustomDict({"class": "LGBMClassifier", "reg": "LGBMRegressor"})
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        # Custom lightgbm mapping for warnings
+        # PYTHONWARNINGS doesn't work since they go from C/C++ code to stdout
+        warns = dict(always=2, default=1, error=0, ignore=-1)
+
+        return self._est_class(
+            verbose=params.pop("verbose", warns.get(self.warnings, -1)),
+            n_jobs=params.pop("n_jobs", self.n_jobs),
+            device=params.pop("device", "gpu" if self._gpu else "cpu"),
+            gpu_device_id=params.pop("gpu_device_id", self._device_id or -1),
+            random_state=params.pop("random_state", self.random_state),
+            **params,
+        )
+
+    def _fit_estimator(
+        self,
+        estimator: PREDICTOR,
+        data: tuple[DATAFRAME, PANDAS],
+        est_params_fit: dict,
+        validation: tuple[DATAFRAME, PANDAS] | None = None,
+        trial: Trial | None = None,
+    ):
+        """Fit the estimator and perform in-training validation.
+
+        Parameters
+        ----------
+        estimator: Predictor
+            Instance to fit.
+
+        data: tuple
+            Training data of the form (X, y).
+
+        est_params_fit: dict
+            Additional parameters for the estimator's fit method.
+
+        validation: tuple or None
+            Validation data of the form (X, y). If None, no validation
+            is performed.
+
+        trial: [Trial][] or None
+            Active trial (during hyperparameter tuning).
+
+        Returns
+        -------
+        Predictor
+            Fitted instance.
+
+        """
+        from lightgbm.callback import log_evaluation
+
+        m = self._metric[0].name
+        params = est_params_fit.copy()
+
+        callbacks = params.pop("callbacks", []) + [log_evaluation(-1)]
+        if trial and len(self._metric) == 1:
+            callbacks.append(LightGBMPruningCallback(trial, m, "valid_1"))
+
+        eval_metric = None
+        if getattr(self, "_metric", None):
+            eval_metric = LGBMetric(self._metric[0], task=self.task)
+
+        try:
+            estimator.fit(
+                *data,
+                eval_set=[data, validation] if validation else None,
+                eval_metric=params.pop("eval_metric", eval_metric),
+                callbacks=callbacks,
+                **params,
+            )
+        except TrialPruned as ex:
+            # Add the pruned step to the output
+            step = str(ex).split(" ")[-1][:-1]
+            steps = estimator.get_params()[self.has_validation]
+            trial.params[self.has_validation] = f"{step}/{steps}"
+
+            trial.set_user_attr("estimator", estimator)
+            raise ex
+
+        if validation:
+            # Create evals attribute with train and validation scores
+            self._evals[f"{m}_train"] = estimator.evals_result_["training"][m]
+            self._evals[f"{m}_test"] = estimator.evals_result_["valid_1"][m]
+
+        return estimator
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_estimators=Int(20, 500, step=10),
+            learning_rate=Float(0.01, 1.0, log=True),
+            max_depth=Int(-1, 17, step=2),
+            num_leaves=Int(20, 40),
+            min_child_weight=Float(1e-4, 100, log=True),
+            min_child_samples=Int(1, 30),
+            subsample=Float(0.5, 1.0, step=0.1),
+            colsample_bytree=Float(0.4, 1.0, step=0.1),
+            reg_alpha=Float(1e-4, 100, log=True),
+            reg_lambda=Float(1e-4, 100, log=True),
+        )
+
+
+class LinearDiscriminantAnalysis(ClassRegModel):
+    """Linear Discriminant Analysis.
+
+    Linear Discriminant Analysis is a classifier with a linear
+    decision boundary, generated by fitting class conditional densities
+    to the data and using Bayes’ rule. The model fits a Gaussian
+    density to each class, assuming that all classes share the same
+    covariance matrix.
+
+    Corresponding estimators are:
+
+    - [LinearDiscriminantAnalysis][ldaclassifier] for classification tasks.
+
+    Read more in sklearn's [documentation][ldadocs].
+
+    See Also
+    --------
+    atom.models:LogisticRegression
+    atom.models:RadiusNearestNeighbors
+    atom.models:QuadraticDiscriminantAnalysis
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="LDA", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "LDA"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "discriminant_analysis"
+    _estimators = CustomDict({"class": "LinearDiscriminantAnalysis"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("solver", params) == "svd":
+            params.pop("shrinkage")
+
+        return params
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            solver=Cat(["svd", "lsqr", "eigen"]),
+            shrinkage=Cat([None, "auto", 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
+        )
+
+
+class LinearSVM(ClassRegModel):
+    """Linear Support Vector Machine.
+
+    Similar to [SupportVectorMachine][] but with a linear kernel.
+    Implemented in terms of liblinear rather than libsvm, so it has
+    more flexibility in the choice of penalties and loss functions and
+    should scale better to large numbers of samples.
+
+    Corresponding estimators are:
+
+    - [LinearSVC][] for classification tasks.
+    - [LinearSVR][] for classification tasks.
+
+    Read more in sklearn's [documentation][svmdocs].
+
+    See Also
+    --------
+    atom.models:KNearestNeighbors
+    atom.models:StochasticGradientDescent
+    atom.models:SupportVectorMachine
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="lSVM", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "lSVM"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "svm"
+    _estimators = CustomDict({"class": "LinearSVC", "reg": "LinearSVR"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self.goal == "class":
+            if self._get_param("loss", params) == "hinge":
+                # l1 regularization can't be combined with hinge
+                params.replace_value("penalty", "l2")
+                # l2 regularization can't be combined with hinge when dual=False
+                params.replace_value("dual", True)
+            elif self._get_param("loss", params) == "squared_hinge":
+                # l1 regularization can't be combined with squared_hinge when dual=True
+                if self._get_param("penalty", params) == "l1":
+                    params.replace_value("dual", False)
+        elif self._get_param("loss", params) == "epsilon_insensitive":
+            params.replace_value("dual", True)
+
+        return params
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the estimator instance.
+
+        Parameters
+        ----------
+        **params
+            Unpacked hyperparameters for the estimator.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        if self.engine.get("estimator") == "cuml" and self.goal == "class":
+            return self._est_class(probability=params.pop("probability", True), **params)
+        else:
+            return super()._get_est(**params)
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict()
+        if self.goal == "class":
+            dist["penalty"] = Cat(["l1", "l2"])
+            dist["loss"] = Cat(["hinge", "squared_hinge"])
+        else:
+            dist["loss"] = Cat(["epsilon_insensitive", "squared_epsilon_insensitive"])
+
+        dist["C"] = Float(1e-3, 100, log=True)
+        dist["dual"] = Cat([True, False])
+
+        if self.engine.get("estimator") == "cuml":
+            dist.pop("dual")
+
+        return dist
+
+
+class LogisticRegression(ClassRegModel):
+    """Logistic Regression.
+
+    Logistic regression, despite its name, is a linear model for
+    classification rather than regression. Logistic regression is also
+    known in the literature as logit regression, maximum-entropy
+    classification (MaxEnt) or the log-linear classifier. In this model,
+    the probabilities describing the possible outcomes of a single trial
+    are modeled using a logistic function.
+
+    Corresponding estimators are:
+
+    - [LogisticRegression][] for classification tasks.
+
+    Read more in sklearn's [documentation][lrdocs].
+
+    See Also
+    --------
+    atom.models:GaussianProcess
+    atom.models:LinearDiscriminantAnalysis
+    atom.models:PassiveAggressive
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="RF", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "LR"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"class": "LogisticRegression"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        # Limitations on penalty + solver combinations
+        penalty = self._get_param("penalty", params)
+        solver = self._get_param("solver", params)
+        cond_1 = penalty is None and solver == "liblinear"
+        cond_2 = penalty == "l1" and solver not in ("liblinear", "saga")
+        cond_3 = penalty == "elasticnet" and solver != "saga"
+
+        if cond_1 or cond_2 or cond_3:
+            params.replace_value("penalty", "l2")  # Change to default value
+
+        if self._get_param("penalty", params) != "elasticnet":
+            params.pop("l1_ratio")
+
+        if self._get_param("penalty", params) is None:
+            params.pop("C")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            penalty=Cat([None, "l1", "l2", "elasticnet"]),
+            C=Float(1e-3, 100, log=True),
+            solver=Cat(["lbfgs", "newton-cg", "liblinear", "sag", "saga"]),
+            max_iter=Int(100, 1000, step=10),
+            l1_ratio=Float(0, 1.0, step=0.1),
+        )
+
+        if self._gpu:
+            dist.pop("solver")
+            dist.pop("penalty")  # Only 'l2' is supported
+        elif self.engine.get("estimator") == "sklearnex":
+            dist["solver"] = Cat(["lbfgs", "newton-cg"])
+
+        return dist
+
+
+class MultiLayerPerceptron(ClassRegModel):
+    """Multi-layer Perceptron.
+
+    Multi-layer Perceptron is a supervised learning algorithm that
+    learns a function by training on a dataset. Given a set of features
+    and a target, it can learn a non-linear function approximator for
+    either classification or regression. It is different from logistic
+    regression, in that between the input and the output layer, there
+    can be one or more non-linear layers, called hidden layers.
+
+    Corresponding estimators are:
+
+    - [MLPClassifier][] for classification tasks.
+    - [MLPRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][mlpdocs].
+
+    See Also
+    --------
+    atom.models:PassiveAggressive
+    atom.models:Perceptron
+    atom.models:StochasticGradientDescent
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="MLP", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "MLP"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = False
+    has_validation = "max_iter"
+    supports_engines = ["sklearn"]
+
+    _module = "neural_network"
+    _estimators = CustomDict({"class": "MLPClassifier", "reg": "MLPRegressor"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        # Drop layers when a previous layer has 0 neurons
+        drop = False
+        for param in [p for p in sorted(params) if p.startswith("hidden_layer")]:
+            if params[param] == 0 or drop:
+                drop = True
+                params.pop(param)
+
+        if self._get_param("solver", params) != "sgd":
+            params.pop("learning_rate")
+            params.pop("power_t")
+        else:
+            params.pop("learning_rate_init")
+
+        return params
+
+    def _trial_to_est(self, params: CustomDict) -> CustomDict:
+        """Convert trial's hyperparameters to parameters for the estimator.
+
+        Parameters
+        ----------
+        params: CustomDict
+            Trial's hyperparameters.
+
+        Returns
+        -------
+        CustomDict
+            Estimator's hyperparameters.
+
+        """
+        params = super()._trial_to_est(params)
+
+        hidden_layer_sizes = []
+        for param in [p for p in sorted(params) if p.startswith("hidden_layer")]:
+            hidden_layer_sizes.append(params.pop(param))
+
+        if hidden_layer_sizes:
+            params.insert(0, "hidden_layer_sizes", tuple(hidden_layer_sizes))
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            hidden_layer_1=Int(10, 100),
+            hidden_layer_2=Int(0, 100),
+            hidden_layer_3=Int(0, 10),
+            activation=Cat(["identity", "logistic", "tanh", "relu"]),
+            solver=Cat(["lbfgs", "sgd", "adam"]),
+            alpha=Float(1e-4, 0.1, log=True),
+            batch_size=Cat(["auto", 8, 16, 32, 64, 128, 256]),
+            learning_rate=Cat(["constant", "invscaling", "adaptive"]),
+            learning_rate_init=Float(1e-3, 0.1, log=True),
+            power_t=Float(0.1, 0.9, step=0.1),
+            max_iter=Int(50, 500, step=10),
+        )
+
+        # Drop layers if sizes are specified by user
+        return dist[3:] if "hidden_layer_sizes" in self._est_params else dist
+
+
+class MultinomialNB(ClassRegModel):
+    """Multinomial Naive Bayes.
+
+    MultinomialNB implements the Naive Bayes algorithm for multinomially
+    distributed data, and is one of the two classic Naive Bayes variants
+    used in text classification (where the data are typically
+    represented as word vector counts, although tf-idf vectors are also
+    known to work well in practice).
+
+    Corresponding estimators are:
+
+    - [MultinomialNB][multinomialnbclass] for classification tasks.
+
+    Read more in sklearn's [documentation][mnbdocs].
+
+    See Also
+    --------
+    atom.models:BernoulliNB
+    atom.models:ComplementNB
+    atom.models:GaussianNB
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="MNB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "MNB"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "cuml"]
+
+    _module = "naive_bayes"
+    _estimators = CustomDict({"class": "MultinomialNB"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            alpha=Float(0.01, 10, log=True),
+            fit_prior=Cat([True, False]),
+        )
+
+
+class OrdinaryLeastSquares(ClassRegModel):
+    """Linear Regression.
+
+    Ordinary Least Squares is just linear regression without any
+    regularization. It fits a linear model with coefficients `w=(w1,
+     ..., wp)` to minimize the residual sum of squares between the
+    observed targets in the dataset, and the targets predicted by the
+    linear approximation.
+
+    Corresponding estimators are:
+
+    - [LinearRegression][] for regression tasks.
+
+    Read more in sklearn's [documentation][olsdocs].
+
+    See Also
+    --------
+    atom.models:ElasticNet
+    atom.models:Lasso
+    atom.models:Ridge
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="OLS", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "OLS"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "LinearRegression"})
+
+
+class OrthogonalMatchingPursuit(ClassRegModel):
+    """Orthogonal Matching Pursuit.
+
+    Orthogonal Matching Pursuit implements the OMP algorithm for
+    approximating the fit of a linear model with constraints imposed
+    on the number of non-zero coefficients.
+
+    Corresponding estimators are:
+
+    - [OrthogonalMatchingPursuit][] for regression tasks.
+
+    Read more in sklearn's [documentation][ompdocs].
+
+    See Also
+    --------
+    atom.models:Lasso
+    atom.models:LeastAngleRegression
+    atom.models:OrdinaryLeastSquares
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="OMP", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "OMP"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"reg": "OrthogonalMatchingPursuit"})
+
+
+class PassiveAggressive(ClassRegModel):
+    """Passive Aggressive.
+
+    The passive-aggressive algorithms are a family of algorithms for
+    large-scale learning. They are similar to the Perceptron in that
+    they do not require a learning rate. However, contrary to the
+    [Perceptron][], they include a regularization parameter `C`.
+
+    Corresponding estimators are:
+
+    - [PassiveAggressiveClassifier][] for classification tasks.
+    - [PassiveAggressiveRegressor][] for classification tasks.
+
+    Read more in sklearn's [documentation][padocs].
+
+    See Also
+    --------
+    atom.models:MultiLayerPerceptron
+    atom.models:Perceptron
+    atom.models:StochasticGradientDescent
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="PA", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "PA"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "max_iter"
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict(
+        {"class": "PassiveAggressiveClassifier", "reg": "PassiveAggressiveRegressor"}
+    )
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        if self.goal == "class":
+            loss = ["hinge", "squared_hinge"]
+        else:
+            loss = ["epsilon_insensitive", "squared_epsilon_insensitive"]
+
+        return CustomDict(
+            C=Float(1e-3, 100, log=True),
+            max_iter=Int(500, 1500, step=50),
+            loss=Cat(loss),
+            average=Cat([True, False]),
+        )
+
+
+class Perceptron(ClassRegModel):
+    """Linear Perceptron classification.
+
+    The Perceptron is a simple classification algorithm suitable for
+    large scale learning. By default:
+
+    * It does not require a learning rate.
+    * It is not regularized (penalized).
+    * It updates its model only on mistakes.
+
+    The last characteristic implies that the Perceptron is slightly
+    faster to train than [StochasticGradientDescent][] with the hinge
+    loss and that the resulting models are sparser.
+
+    Corresponding estimators are:
+
+    - [Perceptron][percclassifier] for classification tasks.
+
+    Read more in sklearn's [documentation][percdocs].
+
+    See Also
+    --------
+    atom.models:MultiLayerPerceptron
+    atom.models:PassiveAggressive
+    atom.models:StochasticGradientDescent
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="Perc", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "Perc"
+    needs_scaling = True
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "max_iter"
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"class": "Perceptron"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("penalty", params) != "elasticnet":
+            params.pop("l1_ratio")
+
+        return params
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            penalty=Cat([None, "l2", "l1", "elasticnet"]),
+            alpha=Float(1e-4, 10, log=True),
+            l1_ratio=Float(0.1, 0.9, step=0.1),
+            max_iter=Int(500, 1500, step=50),
+            eta0=Float(1e-2, 10, log=True),
+        )
+
+
+class QuadraticDiscriminantAnalysis(ClassRegModel):
+    """Quadratic Discriminant Analysis.
+
+    Quadratic Discriminant Analysis is a classifier with a quadratic
+    decision boundary, generated by fitting class conditional densities
+    to the data and using Bayes’ rule. The model fits a Gaussian
+    density to each class, assuming that all classes share the same
+    covariance matrix.
+
+    Corresponding estimators are:
+
+    - [QuadraticDiscriminantAnalysis][qdaclassifier] for classification tasks.
+
+    Read more in sklearn's [documentation][ldadocs].
+
+    See Also
+    --------
+    atom.models:LinearDiscriminantAnalysis
+    atom.models:LogisticRegression
+    atom.models:RadiusNearestNeighbors
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="QDA", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "QDA"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "discriminant_analysis"
+    _estimators = CustomDict({"class": "QuadraticDiscriminantAnalysis"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(reg_param=Float(0, 1.0, step=0.1))
+
+
+class RadiusNearestNeighbors(ClassRegModel):
+    """Radius Nearest Neighbors.
+
+    Radius Nearest Neighbors implements the nearest neighbors vote,
+    where the neighbors are selected from within a given radius. For
+    regression, the target is predicted by local interpolation of the
+    targets associated of the nearest neighbors in the training set.
+
+    !!! warning
+        * The `radius` parameter should be tuned to the data at hand or
+          the model will perform poorly.
+        * If outliers are detected, the estimator raises an exception
+          unless `est_params={"outlier_label": "most_frequent"}` is used.
+
+    Corresponding estimators are:
+
+    - [RadiusNeighborsClassifier][] for classification tasks.
+    - [RadiusNeighborsRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][knndocs].
+
+    See Also
+    --------
+    atom.models:KNearestNeighbors
+    atom.models:LinearDiscriminantAnalysis
+    atom.models:QuadraticDiscriminantAnalysis
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(
+        models="RNN",
+        metric="f1",
+        est_params={"outlier_label": "most_frequent"},
+        verbose=2,
+    )
+    ```
+
+    """
+
+    acronym = "RNN"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn"]
+
+    _module = "neighbors"
+    _estimators = CustomDict(
+        {"class": "RadiusNeighborsClassifier", "reg": "RadiusNeighborsRegressor"}
+    )
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            radius=Float(1e-2, 100),
+            weights=Cat(["uniform", "distance"]),
+            algorithm=Cat(["auto", "ball_tree", "kd_tree", "brute"]),
+            leaf_size=Int(20, 40),
+            p=Int(1, 2),
+        )
+
+
+class RandomForest(ClassRegModel):
+    """Random Forest.
+
+    Random forests are an ensemble learning method that operate by
+    constructing a multitude of decision trees at training time and
+    outputting the class that is the mode of the classes
+    (classification) or mean prediction (regression) of the individual
+    trees. Random forests correct for decision trees' habit of
+    overfitting to their training set.
+
+    Corresponding estimators are:
+
+    - [RandomForestClassifier][] for classification tasks.
+    - [RandomForestRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][adabdocs].
+
+    !!! warning
+        cuML's implementation of [RandomForestClassifier][cumlrf] only
+        supports predictions on dtype `float32`. Convert all dtypes
+        before calling atom's [run][atomclassifier-run] method to avoid
+        exceptions.
+
+    See Also
+    --------
+    atom.models:DecisionTree
+    atom.models:ExtraTrees
+    atom.models:HistGradientBoosting
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="RF", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "RF"
+    needs_scaling = False
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "ensemble"
+    _estimators = CustomDict(
+        {"class": "RandomForestClassifier", "reg": "RandomForestRegressor"}
+    )
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if not self._get_param("bootstrap", params):
+            params.pop("max_samples")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        if self.goal == "class":
+            criterion = ["gini", "entropy"]
+        else:
+            if self.engine.get("estimator") == "cuml":
+                criterion = ["mse", "poisson", "gamma", "inverse_gaussian"]
+            else:
+                criterion = ["squared_error", "absolute_error", "poisson"]
+
+        dist = CustomDict(
+            n_estimators=Int(10, 500, step=10),
+            criterion=Cat(criterion),
+            max_depth=Cat([None, *range(1, 17)]),
+            min_samples_split=Int(2, 20),
+            min_samples_leaf=Int(1, 20),
+            max_features=Cat([None, "sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9]),
+            bootstrap=Cat([True, False]),
+            max_samples=Cat([None, 0.5, 0.6, 0.7, 0.8, 0.9]),
+            ccp_alpha=Float(0, 0.035, step=0.005),
+        )
+
+        if self.engine.get("estimator") == "sklearnex":
+            dist.pop("criterion")
+            dist.pop("ccp_alpha")
+        elif self.engine.get("estimator") == "cuml":
+            dist.replace_key("criterion", "split_criterion")
+            dist["max_depth"] = Int(1, 17)
+            dist["max_features"] = Cat(["sqrt", "log2", 0.5, 0.6, 0.7, 0.8, 0.9])
+            dist["max_samples"] = Float(0.5, 0.9, step=0.1)
+            dist.pop("ccp_alpha")
+
+        return dist
+
+
+class Ridge(ClassRegModel):
+    """Linear least squares with l2 regularization.
+
+    If classifier, it first converts the target values into {-1, 1}
+    and then treats the problem as a regression task.
+
+    Corresponding estimators are:
+
+    - [RidgeClassifier][] for classification tasks.
+    - [Ridge][ridgeregressor] for regression tasks.
+
+    Read more in sklearn's [documentation][ridgedocs].
+
+    !!! warning
+        Engines `sklearnex` and `cuml` are only available for regression
+        tasks.
+
+    See Also
+    --------
+    atom.models:BayesianRidge
+    atom.models:ElasticNet
+    atom.models:Lasso
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMRegressor
+    from sklearn.datasets import fetch_california_housing
+
+    X, y = fetch_california_housing(return_X_y=True)
+
+    atom = ATOMRegressor(X, y, random_state=1)
+    atom.run(models="Ridge", metric="r2", verbose=2)
+    ```
+
+    """
+
+    acronym = "Ridge"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = True
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"class": "RidgeClassifier", "reg": "Ridge"})
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            alpha=Float(1e-3, 10, log=True),
+            solver=Cat(["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
+        )
+
+        if self.goal == "reg":
+            if self.engine.get("estimator") == "sklearnex":
+                dist.pop("solver")  # Only supports 'auto'
+            elif self.engine.get("estimator") == "cuml":
+                dist["solver"] = Cat(["eig", "svd", "cd"])
+
+        return dist
+
+
+class StochasticGradientDescent(ClassRegModel):
+    """Stochastic Gradient Descent.
+
+    Stochastic Gradient Descent is a simple yet very efficient approach
+    to fitting linear classifiers and regressors under convex loss
+    functions. Even though SGD has been around in the machine learning
+    community for a long time, it has received a considerable amount of
+    attention just recently in the context of large-scale learning.
+
+    Corresponding estimators are:
+
+    - [SGDClassifier][] for classification tasks.
+    - [SGDRegressor][] for regression tasks.
+
+    Read more in sklearn's [documentation][sgddocs].
+
+    See Also
+    --------
+    atom.models:MultiLayerPerceptron
+    atom.models:PassiveAggressive
+    atom.models:SupportVectorMachine
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="SGD", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "SGD"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "max_iter"
+    supports_engines = ["sklearn"]
+
+    _module = "linear_model"
+    _estimators = CustomDict({"class": "SGDClassifier", "reg": "SGDRegressor"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("penalty", params) != "elasticnet":
+            params.pop("l1_ratio")
+
+        if self._get_param("learning_rate", params) == "optimal":
+            params.pop("eta0")
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        loss = [
+            "hinge",
+            "log_loss",
+            "modified_huber",
+            "squared_hinge",
+            "perceptron",
+            "squared_error",
+            "huber",
+            "epsilon_insensitive",
+            "squared_epsilon_insensitive",
+        ]
+
+        return CustomDict(
+            loss=Cat(loss if self.goal == "class" else loss[-4:]),
+            penalty=Cat([None, "l1", "l2", "elasticnet"]),
+            alpha=Float(1e-4, 1.0, log=True),
+            l1_ratio=Float(0.1, 0.9, step=0.1),
+            max_iter=Int(500, 1500, step=50),
+            epsilon=Float(1e-4, 1.0, log=True),
+            learning_rate=Cat(["constant", "invscaling", "optimal", "adaptive"]),
+            eta0=Float(1e-2, 10, log=True),
+            power_t=Float(0.1, 0.9, step=0.1),
+            average=Cat([True, False]),
+        )
+
+
+class SupportVectorMachine(ClassRegModel):
+    """Support Vector Machine.
+
+    The implementation of the Support Vector Machine is based on libsvm.
+    The fit time scales at least quadratically with the number of
+    samples and may be impractical beyond tens of thousands of samples.
+    For large datasets consider using a [LinearSVM][] or a
+    [StochasticGradientDescent][] model instead.
+
+    Corresponding estimators are:
+
+    - [SVC][] for classification tasks.
+    - [SVR][] for classification tasks.
+
+    Read more in sklearn's [documentation][svmdocs].
+
+    See Also
+    --------
+    atom.models:LinearSVM
+    atom.models:MultiLayerPerceptron
+    atom.models:StochasticGradientDescent
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="SVM", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "SVM"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = None
+    supports_engines = ["sklearn", "sklearnex", "cuml"]
+
+    _module = "svm"
+    _estimators = CustomDict({"class": "SVC", "reg": "SVR"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self.goal == "class":
+            params.pop("epsilon")
+
+        kernel = self._get_param("kernel", params)
+        if kernel == "poly":
+            params.replace_value("gamma", "scale")  # Crashes in combination with "auto"
+        else:
+            params.pop("degree")
+
+        if kernel not in ("rbf", "poly", "sigmoid"):
+            params.pop("gamma")
+
+        if kernel not in ("poly", "sigmoid"):
+            params.pop("coef0")
+
+        return params
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        if self.engine.get("estimator") == "cuml" and self.goal == "class":
+            return self._est_class(
+                probability=params.pop("probability", True),
+                random_state=params.pop("random_state", self.random_state),
+                **params)
+        else:
+            return super()._get_est(**params)
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        dist = CustomDict(
+            C=Float(1e-3, 100, log=True),
+            kernel=Cat(["linear", "poly", "rbf", "sigmoid"]),
+            degree=Int(2, 5),
+            gamma=Cat(["scale", "auto"]),
+            coef0=Float(-1.0, 1.0),
+            epsilon=Float(1e-3, 100, log=True),
+            shrinking=Cat([True, False]),
+        )
+
+        if self.engine.get("estimator") == "cuml":
+            dist.pop("epsilon")
+            dist.pop("shrinking")
+
+        return dist
+
+
+class XGBoost(ClassRegModel):
+    """Extreme Gradient Boosting.
+
+    XGBoost is an optimized distributed gradient boosting model
+    designed to be highly efficient, flexible and portable. XGBoost
+    provides a parallel tree boosting that solve many data science
+    problems in a fast and accurate way.
+
+    Corresponding estimators are:
+
+    - [XGBClassifier][] for classification tasks.
+    - [XGBRegressor][] for regression tasks.
+
+    Read more in XGBoost's [documentation][xgbdocs].
+
+    See Also
+    --------
+    atom.models:CatBoost
+    atom.models:GradientBoostingMachine
+    atom.models:LightGBM
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMClassifier
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+    atom = ATOMClassifier(X, y, random_state=1)
+    atom.run(models="XGB", metric="f1", verbose=2)
+    ```
+
+    """
+
+    acronym = "XGB"
+    needs_scaling = True
+    accepts_sparse = True
+    native_multilabel = False
+    native_multioutput = False
+    has_validation = "n_estimators"
+    supports_engines = ["xgboost"]
+
+    _module = "xgboost"
+    _estimators = CustomDict({"class": "XGBClassifier", "reg": "XGBRegressor"})
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        eval_metric = None
+        if getattr(self, "_metric", None):
+            eval_metric = XGBMetric(self._metric[0], task=self.task)
+
+        return self._est_class(
+            eval_metric=params.pop("eval_metric", eval_metric),
+            n_jobs=params.pop("n_jobs", self.n_jobs),
+            tree_method=params.pop("tree_method", "gpu_hist" if self._gpu else None),
+            gpu_id=self._device_id,
+            verbosity=params.pop("verbosity", 0),
+            random_state=params.pop("random_state", self.random_state),
+            **params,
+        )
+
+    def _fit_estimator(
+        self,
+        estimator: PREDICTOR,
+        data: tuple[DATAFRAME, PANDAS],
+        est_params_fit: dict,
+        validation: tuple[DATAFRAME, PANDAS] | None = None,
+        trial: Trial | None = None,
+    ):
+        """Fit the estimator and perform in-training validation.
+
+        Parameters
+        ----------
+        estimator: Predictor
+            Instance to fit.
+
+        data: tuple
+            Training data of the form (X, y).
+
+        est_params_fit: dict
+            Additional parameters for the estimator's fit method.
+
+        validation: tuple or None
+            Validation data of the form (X, y). If None, no validation
+            is performed.
+
+        trial: [Trial][] or None
+            Active trial (during hyperparameter tuning).
+
+        Returns
+        -------
+        Predictor
+            Fitted instance.
+
+        """
+        m = self._metric[0].name
+        params = est_params_fit.copy()
+
+        callbacks = params.pop("callbacks", [])
+        if trial and len(self._metric) == 1:
+            callbacks.append(XGBoostPruningCallback(trial, f"validation_1-{m}"))
+
+        try:
+            estimator.set_params(callbacks=callbacks)
+            estimator.fit(
+                *data,
+                eval_set=[data, validation] if validation else None,
+                verbose=params.get("verbose", False),
+                **params,
+            )
+        except TrialPruned as ex:
+            # Add the pruned step to the output
+            step = str(ex).split(" ")[-1][:-1]
+            steps = estimator.get_params()[self.has_validation]
+            trial.params[self.has_validation] = f"{step}/{steps}"
+
+            trial.set_user_attr("estimator", estimator)
+            raise ex
+
+        if validation:
+            # Create evals attribute with train and validation scores
+            # Negative because minimizes the function
+            results = estimator.evals_result()
+            self._evals[f"{m}_train"] = np.negative(results["validation_0"][m])
+            self._evals[f"{m}_test"] = np.negative(results["validation_1"][m])
+
+        return estimator
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            n_estimators=Int(20, 500, step=10),
+            learning_rate=Float(0.01, 1.0, log=True),
+            max_depth=Int(1, 20),
+            gamma=Float(0, 1.0),
+            min_child_weight=Int(1, 10),
+            subsample=Float(0.5, 1.0, step=0.1),
+            colsample_bytree=Float(0.4, 1.0, step=0.1),
+            reg_alpha=Float(1e-4, 100, log=True),
+            reg_lambda=Float(1e-4, 100, log=True),
+        )
diff --git a/atom/models/ensembles.py b/atom/models/ensembles.py
new file mode 100644
index 000000000..39d890983
--- /dev/null
+++ b/atom/models/ensembles.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing all ensemble models.
+
+"""
+
+from __future__ import annotations
+
+from atom.basemodel import ClassRegModel
+from atom.pipeline import Pipeline
+from atom.utils.types import PREDICTOR
+from atom.utils.utils import ClassMap, CustomDict, sign
+
+
+class Stacking(ClassRegModel):
+    """Stacking ensemble.
+
+    Parameters
+    ----------
+    models: ClassMap
+        Models from which to build the ensemble.
+
+    **kwargs
+        Additional keyword arguments for the estimator.
+
+    """
+
+    acronym = "Stack"
+    needs_scaling = False
+    has_validation = None
+    native_multilabel = False
+    native_multioutput = False
+    supports_engines = []
+
+    _module = "atom.ensembles"
+    _estimators = CustomDict({"class": "StackingClassifier", "reg": "StackingRegressor"})
+
+    def __init__(self, models: ClassMap, **kwargs):
+        self._models = models
+        kw_model = {k: v for k, v in kwargs.items() if k in sign(ClassRegModel.__init__)}
+        super().__init__(**kw_model)
+        self._est_params = {k: v for k, v in kwargs.items() if k not in kw_model}
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        estimators = []
+        for m in self._models:
+            if m.scaler:
+                name = f"pipeline_{m.name}"
+                est = Pipeline([("scaler", m.scaler), (m.name, m.estimator)])
+            else:
+                name = m.name
+                est = m.estimator
+
+            estimators.append((name, est))
+
+        return self._est_class(
+            estimators=estimators,
+            n_jobs=params.pop("n_jobs", self.n_jobs),
+            **params,
+        )
+
+
+class Voting(ClassRegModel):
+    """Voting ensemble.
+
+    Parameters
+    ----------
+    models: ClassMap
+        Models from which to build the ensemble.
+
+    **kwargs
+        Additional keyword arguments for the estimator.
+
+    """
+
+    acronym = "Vote"
+    needs_scaling = False
+    has_validation = None
+    native_multilabel = False
+    native_multioutput = False
+    supports_engines = []
+
+    _module = "atom.ensembles"
+    _estimators = CustomDict({"class": "VotingClassifier", "reg": "VotingRegressor"})
+
+    def __init__(self, models: ClassMap, **kwargs):
+        self._models = models
+        kw_model = {k: v for k, v in kwargs.items() if k in sign(ClassRegModel.__init__)}
+        super().__init__(**kw_model)
+        self._est_params = {k: v for k, v in kwargs.items() if k not in kw_model}
+
+        if self._est_params.get("voting") == "soft":
+            for m in self._models:
+                if not hasattr(m.estimator, "predict_proba"):
+                    raise ValueError(
+                        "Invalid value for the voting parameter. If "
+                        "'soft', all models in the ensemble should have "
+                        f"a predict_proba method, got {m._fullname}."
+                    )
+
+    def _get_est(self, **params) -> PREDICTOR:
+        """Get the model's estimator with unpacked parameters.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        estimators = []
+        for m in self._models:
+            if m.scaler:
+                name = f"pipeline_{m.name}"
+                est = Pipeline([("scaler", m.scaler), (m.name, m.estimator)])
+            else:
+                name = m.name
+                est = m.estimator
+
+            estimators.append((name, est))
+
+        return self._est_class(
+            estimators=estimators,
+            n_jobs=params.pop("n_jobs", self.n_jobs),
+            **params,
+        )
diff --git a/atom/models/ts.py b/atom/models/ts.py
new file mode 100644
index 000000000..b3680a95a
--- /dev/null
+++ b/atom/models/ts.py
@@ -0,0 +1,535 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing all time series models.
+
+"""
+
+from __future__ import annotations
+
+from optuna.distributions import CategoricalDistribution as Cat
+from optuna.distributions import IntDistribution as Int
+from optuna.trial import Trial
+
+from atom.basemodel import ForecastModel
+from atom.utils.utils import CustomDict
+
+
+class ARIMA(ForecastModel):
+    """Autoregressive Integrated Moving Average Model.
+
+    Seasonal ARIMA models and exogeneous input is supported, hence this
+    estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX.
+
+    An ARIMA model, is a generalization of an autoregressive moving
+    average (ARMA) model, and is fitted to time-series data in an effort
+    to forecast future points. ARIMA models can be especially
+    efficacious in cases where data shows evidence of non-stationarity.
+
+    The "AR" part of ARIMA indicates that the evolving variable of
+    interest is regressed on its own lagged (i.e., prior observed)
+    values. The "MA" part indicates that the regression error is
+    actually a linear combination of error terms whose values occurred
+    contemporaneously and at various times in the past. The "I" (for
+    "integrated") indicates that the data values have been replaced with
+    the difference between their values and the previous values (and this
+    differencing process may have been performed more than once).
+
+    Corresponding estimators are:
+
+    - [ARIMA][arimaclass] for forecasting tasks.
+
+    !!! warning
+        ARIMA often runs into numerical errors when optimizing the
+        hyperparameters. Possible solutions are:
+
+        - Use the [AutoARIMA][] model instead.
+        - Use [`est_params`][directforecaster-est_params] to specify the
+          orders manually, e.g. `#!python atom.run("arima", n_trials=5,
+          est_params={"order": (1, 1, 0)})`.
+        - Use the `catch` parameter in [`ht_params`][directforecaster-ht_params]
+          to avoid raising every exception, e.g. `#!python atom.run("arima",
+          n_trials=5, ht_params={"catch": (Exception,)})`.
+
+    See Also
+    --------
+    atom.models:AutoARIMA
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_longley
+
+    _, X = load_longley()
+
+    atom = ATOMForecaster(X)
+    atom.run(models="ARIMA", verbose=2)
+    ```
+
+    """
+
+    acronym = "ARIMA"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.arima"
+    _estimators = CustomDict({"fc": "ARIMA"})
+
+    _order = ("p", "d", "q")
+    _sorder = ("Ps", "Ds", "Qs", "S")
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        # If no seasonal periodicity, set seasonal components to zero
+        if self._get_param("S", params) == 0:
+            for p in self._sorder:
+                params.replace_value(p, 0)
+
+        return params
+
+    def _trial_to_est(self, params: CustomDict) -> CustomDict:
+        """Convert trial's hyperparameters to parameters for the estimator.
+
+        Parameters
+        ----------
+        params: CustomDict
+            Trial's hyperparameters.
+
+        Returns
+        -------
+        CustomDict
+            Estimator's hyperparameters.
+
+        """
+        params = super()._trial_to_est(params)
+
+        # Convert params to hyperparameters order and seasonal_order
+        if all(p in params for p in self._sorder):
+            params.insert(0, "seasonal_order", tuple(params.pop(p) for p in self._sorder))
+        if all(p in params for p in self._order):
+            params.insert(0, "order", tuple(params.pop(p) for p in self._order))
+
+        return params
+
+    def _get_distributions(self) -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        methods = ["newton", "nm", "bfgs", "lbfgs", "powell", "cg", "ncg", "basinhopping"]
+
+        dist = CustomDict(
+            p=Int(0, 2),
+            d=Int(0, 1),
+            q=Int(0, 2),
+            Ps=Int(0, 2),
+            Ds=Int(0, 1),
+            Qs=Int(0, 2),
+            S=Cat([0, 4, 6, 7, 12]),
+            method=Cat(methods),
+            maxiter=Int(50, 200, step=10),
+            with_intercept=Cat([True, False]),
+        )
+
+        # Drop order and seasonal_order params if specified by user
+        if "order" in self._est_params:
+            for p in self._order:
+                dist.pop(p)
+        if "seasonal_order" in self._est_params:
+            for p in self._sorder:
+                dist.pop(p)
+
+        return dist
+
+
+class AutoARIMA(ForecastModel):
+    """Automatic Autoregressive Integrated Moving Average Model.
+
+    [ARIMA][] implementation that includes automated fitting of
+    (S)ARIMA(X) hyperparameters (p, d, q, P, D, Q). The AutoARIMA
+    algorithm seeks to identify the most optimal parameters for an
+    ARIMA model, settling on a single fitted ARIMA model. This process
+    is based on the commonly-used R function.
+
+    AutoARIMA works by conducting differencing tests (i.e.,
+    Kwiatkowski–Phillips–Schmidt–Shin, Augmented Dickey-Fuller or
+    Phillips–Perron) to determine the order of differencing, d, and
+    then fitting models within defined ranges. AutoARIMA also seeks
+    to identify the optimal P and Q hyperparameters after conducting
+    the Canova-Hansen to determine the optimal order of seasonal
+    differencing.
+
+    Note that due to stationarity issues, AutoARIMA might not find a
+    suitable model that will converge. If this is the case, a ValueError
+    is thrown suggesting stationarity-inducing measures be taken prior
+    to re-fitting or that a new range of order values be selected.
+
+    Corresponding estimators are:
+
+    - [AutoARIMA][autoarimaclass] for forecasting tasks.
+
+    See Also
+    --------
+    atom.models:ARIMA
+    atom.models:ETS
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_longley
+
+    _, X = load_longley()
+
+    atom = ATOMForecaster(X, random_state=1)
+    atom.run(models="autoarima", verbose=2)
+    ```
+
+    """
+
+    acronym = "AutoARIMA"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.arima"
+    _estimators = CustomDict({"fc": "AutoARIMA"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        methods = ["newton", "nm", "bfgs", "lbfgs", "powell", "cg", "ncg", "basinhopping"]
+
+        return CustomDict(
+            method=Cat(methods),
+            maxiter=Int(50, 200, step=10),
+            with_intercept=Cat([True, False]),
+        )
+
+
+class ExponentialSmoothing(ForecastModel):
+    """Exponential Smoothing forecaster.
+
+    Holt-Winters exponential smoothing forecaster. The default settings
+    use simple exponential smoothing, without trend and seasonality
+    components.
+
+    Corresponding estimators are:
+
+    - [ExponentialSmoothing][esclass] for forecasting tasks.
+
+    See Also
+    --------
+    atom.models:ARIMA
+    atom.models:ETS
+    atom.models:PolynomialTrend
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_airline
+
+    y = load_airline()
+
+    atom = ATOMForecaster(y, random_state=1)
+    atom.run(models="ES", verbose=2)
+    ```
+
+    """
+
+    acronym = "ES"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.exp_smoothing"
+    _estimators = CustomDict({"fc": "ExponentialSmoothing"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        if self._get_param("trend", params) is None:
+            params.pop("damped_trend")
+
+        if self._get_param("sp", params) is None:
+            params.pop("seasonal")
+
+        return params
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        methods = ["L-BFGS-B", "TNC", "SLSQP", "Powell", "trust-constr", "bh", "ls"]
+
+        return CustomDict(
+            trend=Cat(["add", "mul", None]),
+            damped_trend=Cat([True, False]),
+            seasonal=Cat(["add", "mul", None]),
+            sp=Cat([4, 6, 7, 12, None]),
+            use_boxcox=Cat([True, False]),
+            initialization_method=Cat(["estimated", "heuristic"]),
+            method=Cat(methods),
+        )
+
+
+class ETS(ForecastModel):
+    """ETS model with automatic fitting capabilities.
+
+    The ETS models are a family of time series models with an
+    underlying state space model consisting of a level component,
+    a trend component (T), a seasonal component (S), and an error
+    term (E).
+
+    Corresponding estimators are:
+
+    - [AutoETS][] for forecasting tasks.
+
+    See Also
+    --------
+    atom.models:ARIMA
+    atom.models:ExponentialSmoothing
+    atom.models:PolynomialTrend
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_airline
+
+    y = load_airline()
+
+    atom = ATOMForecaster(y, random_state=1)
+    atom.run(models="ETS", verbose=2)
+
+    ```
+
+    """
+
+    acronym = "ETS"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.ets"
+    _estimators = CustomDict({"fc": "AutoETS"})
+
+    def _get_parameters(self, trial: Trial) -> CustomDict:
+        """Get the trial's hyperparameters.
+
+        Parameters
+        ----------
+        trial: [Trial][]
+            Current trial.
+
+        Returns
+        -------
+        CustomDict
+            Trial's hyperparameters.
+
+        """
+        params = super()._get_parameters(trial)
+
+        # If no seasonal periodicity, set seasonal components to zero
+        if self._get_param("sp", params) == 1:
+            params.pop("seasonal")
+
+        return params
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            error=Cat(["add", "mul"]),
+            trend=Cat(["add", "mul", None]),
+            damped_trend=Cat([True, False]),
+            seasonal=Cat(["add", "mul", None]),
+            sp=Cat([1, 4, 6, 7, 12]),
+            initialization_method=Cat(["estimated", "heuristic"]),
+            maxiter=Int(500, 2000, step=100),
+            auto=Cat([True, False]),
+            information_criterion=Cat(["aic", "bic", "aicc"]),
+        )
+
+
+class NaiveForecaster(ForecastModel):
+    """Naive Forecaster.
+
+    NaiveForecaster is a dummy forecaster that makes forecasts using
+    simple strategies based on naive assumptions about past trends
+    continuing. When used in [multivariate][] tasks, each column is
+    forecasted with the same strategy.
+
+    Corresponding estimators are:
+
+    - [NaiveForecaster][naiveforecasterclass] for forecasting tasks.
+
+    See Also
+    --------
+    atom.models:ExponentialSmoothing
+    atom.models:Dummy
+    atom.models:PolynomialTrend
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_airline
+
+    y = load_airline()
+
+    atom = ATOMForecaster(y, random_state=1)
+    atom.run(models="NF", verbose=2)
+
+    ```
+
+    """
+
+    acronym = "NF"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.naive"
+    _estimators = CustomDict({"fc": "NaiveForecaster"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(strategy=Cat(["last", "mean", "drift"]))
+
+
+class PolynomialTrend(ForecastModel):
+    """Polynomial Trend forecaster.
+
+    Forecast time series data with a polynomial trend, using a sklearn
+    [LinearRegression][] class to regress values of time series on
+    index, after extraction of polynomial features.
+
+    Corresponding estimators are:
+
+    - [PolynomialTrendForecaster][] for forecasting tasks.
+
+    See Also
+    --------
+    atom.models:ARIMA
+    atom.models:ETS
+    atom.models:NaiveForecaster
+
+    Examples
+    --------
+    ```pycon
+    from atom import ATOMForecaster
+    from sktime.datasets import load_airline
+
+    y = load_airline()
+
+    atom = ATOMForecaster(y, random_state=1)
+    atom.run(models="PT", verbose=2)
+    ```
+
+    """
+
+    acronym = "PT"
+    needs_scaling = False
+    accepts_sparse = False
+    native_multilabel = False
+    native_multioutput = True
+    has_validation = None
+    supports_engines = ["sktime"]
+
+    _module = "sktime.forecasting.trend"
+    _estimators = CustomDict({"fc": "PolynomialTrendForecaster"})
+
+    @staticmethod
+    def _get_distributions() -> CustomDict:
+        """Get the predefined hyperparameter distributions.
+
+        Returns
+        -------
+        CustomDict
+            Hyperparameter distributions.
+
+        """
+        return CustomDict(
+            degree=Int(1, 5),
+            with_intercept=Cat([True, False]),
+        )
diff --git a/atom/nlp.py b/atom/nlp.py
index ee3f79a07..5d43a9e14 100644
--- a/atom/nlp.py
+++ b/atom/nlp.py
@@ -949,7 +949,7 @@ class Vectorizer(BaseEstimator, TransformerMixin, BaseTransformer):
 
     def __init__(
         self,
-        strategy: str = "bow",
+        strategy: Literal["bow", "tfidf", "hashing"] = "bow",
         *,
         return_sparse: BOOL = True,
         device: str = "cpu",
@@ -1001,17 +1001,11 @@ def fit(self, X: FEATURES, y: TARGET | None = None) -> Vectorizer:
             hashing="HashingVectorizer",
         )
 
-        if self.strategy in strategies:
-            estimator = self._get_est_class(
-                name=strategies[self.strategy],
-                module="feature_extraction.text",
-            )
-            self._estimator = estimator(**self.kwargs)
-        else:
-            raise ValueError(
-                "Invalid value for the strategy parameter, got "
-                f"{self.strategy}. Choose from: {', '.join(strategies)}."
-            )
+        estimator = self._get_est_class(
+            name=strategies[self.strategy],
+            module="feature_extraction.text",
+        )
+        self._estimator = estimator(**self.kwargs)
 
         self.log("Fitting Vectorizer...", 1)
         self._estimator.fit(X[corpus])
diff --git a/atom/pipeline.py b/atom/pipeline.py
index 9c9fcd2e6..3f68174cb 100644
--- a/atom/pipeline.py
+++ b/atom/pipeline.py
@@ -22,7 +22,8 @@
 from typeguard import typechecked
 
 from atom.utils.types import (
-    BOOL, DATAFRAME, ESTIMATOR, FEATURES, FLOAT, SEQUENCE, SERIES, TARGET, INT
+    BOOL, DATAFRAME, ESTIMATOR, FEATURES, FLOAT, INT, PANDAS, SEQUENCE, SERIES,
+    TARGET,
 )
 from atom.utils.utils import (
     check_is_fitted, fit_one, fit_transform_one, transform_one,
@@ -261,7 +262,7 @@ def transform(
         self,
         X: FEATURES | None = None,
         y: TARGET | None = None,
-    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, SERIES]:
+    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, PANDAS]:
         """Transform the data.
 
         Call `transform` on each transformer in the pipeline. The
@@ -304,7 +305,7 @@ def fit_transform(
         X: FEATURES | None = None,
         y: TARGET | None = None,
         **fit_params,
-    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, SERIES]:
+    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, PANDAS]:
         """Fit the pipeline and transform the data.
 
         Parameters
@@ -314,13 +315,15 @@ def fit_transform(
             X is ignored. None
             if the estimator only uses y.
 
-        y: int, str, dict, sequence or None, default=None
+        y: int, str, dict, sequence, dataframe or None, default=None
             Target column corresponding to X.
 
             - If None: y is ignored.
             - If int: Position of the target column in X.
             - If str: Name of the target column in X.
-            - Else: Array with shape=(n_samples,) to use as target.
+            - If sequence: Target array with shape=(n_samples,) or
+              sequence of column names or positions for multioutput tasks.
+            - If dataframe: Target columns for multioutput tasks.
 
         **fit_params
             Additional keyword arguments for the fit method.
@@ -330,7 +333,7 @@ def fit_transform(
         dataframe
             Transformed feature set. Only returned if provided.
 
-        series
+        series or dataframe
             Transformed target column. Only returned if provided.
 
         """
@@ -352,7 +355,7 @@ def inverse_transform(
         self,
         X: FEATURES | None = None,
         y: TARGET | None = None,
-    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, SERIES]:
+    ) -> DATAFRAME | SERIES | tuple[DATAFRAME, PANDAS]:
         """Inverse transform for each step in a reverse order.
 
         All estimators in the pipeline must implement the
@@ -364,20 +367,22 @@ def inverse_transform(
             Feature set with shape=(n_samples, n_features). If None,
             X is ignored. None if the pipeline only uses y.
 
-        y: int, str, dict, sequence or None, default=None
+        y: int, str, dict, sequence, dataframe or None, default=None
             Target column corresponding to X.
 
             - If None: y is ignored.
             - If int: Position of the target column in X.
             - If str: Name of the target column in X.
-            - Else: Array with shape=(n_samples,) to use as target.
+            - If sequence: Target array with shape=(n_samples,) or
+              sequence of column names or positions for multioutput tasks.
+            - If dataframe: Target columns for multioutput tasks.
 
         Returns
         -------
         dataframe
             Transformed feature set. Only returned if provided.
 
-        series
+        series or dataframe
             Transformed target column. Only returned if provided.
 
         """
diff --git a/atom/plots.py b/atom/plots.py
deleted file mode 100644
index 5da24fdfb..000000000
--- a/atom/plots.py
+++ /dev/null
@@ -1,8289 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Automated Tool for Optimized Modelling (ATOM)
-Author: Mavs
-Description: Module containing the plotting classes.
-
-"""
-
-from __future__ import annotations
-
-from collections import defaultdict
-from contextlib import contextmanager
-from dataclasses import dataclass
-from datetime import datetime
-from functools import reduce
-from importlib.util import find_spec
-from itertools import chain, cycle
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-import shap
-from joblib import Parallel, delayed
-from mlflow.tracking import MlflowClient
-from nltk.collocations import (
-    BigramCollocationFinder, QuadgramCollocationFinder,
-    TrigramCollocationFinder,
-)
-from optuna.importance import FanovaImportanceEvaluator
-from optuna.trial import TrialState
-from optuna.visualization._parallel_coordinate import (
-    _get_dims_from_info, _get_parallel_coordinate_info,
-)
-from optuna.visualization._terminator_improvement import _get_improvement_info
-from optuna.visualization._utils import _is_log_scale
-from plotly.colors import unconvert_from_RGB_255, unlabel_rgb
-from scipy import stats
-from scipy.stats.mstats import mquantiles
-from sklearn.calibration import calibration_curve
-from sklearn.inspection import partial_dependence, permutation_importance
-from sklearn.metrics import (
-    confusion_matrix, det_curve, precision_recall_curve, roc_curve,
-)
-from sklearn.utils import _safe_indexing
-from sklearn.utils._bunch import Bunch
-from sklearn.utils.metaestimators import available_if
-from sktime.forecasting.base import ForecastingHorizon
-from typeguard import typechecked
-
-from atom.utils.constants import PALETTE
-from atom.utils.types import (
-    BOOL, DATAFRAME, FEATURES, FLOAT, INDEX, INT, INT_TYPES, METRIC_SELECTOR,
-    MODEL, SCALAR, SEQUENCE, SERIES, SLICE,
-)
-from atom.utils.utils import (
-    bk, check_canvas, check_dependency, check_hyperparams, check_predict_proba,
-    composed, crash, divide, get_best_score, get_corpus, get_custom_scorer,
-    has_attr, has_task, is_binary, is_multioutput, it, lst, plot_from_model,
-    rnd, to_rgb,
-)
-
-
-@dataclass
-class Aesthetics:
-    """Keeps track of plot aesthetics."""
-
-    palette: SEQUENCE  # Sequence of colors
-    title_fontsize: INT  # Fontsize for titles
-    label_fontsize: INT  # Fontsize for labels, legend and hoverinfo
-    tick_fontsize: INT  # Fontsize for ticks
-    line_width: INT  # Width of the line plots
-    marker_size: INT  # Size of the markers
-
-
-@typechecked
-class BaseFigure:
-    """Base plotly figure.
-
-    The instance stores the position of the current axes in grid,
-    as well as the models used for the plot (to track in mlflow).
-
-    Parameters
-    ----------
-    rows: int, default=1
-        Number of subplot rows in the canvas.
-
-    cols: int, default=1
-        Number of subplot columns in the canvas.
-
-    horizontal_spacing: float, default=0.05
-        Space between subplot rows in normalized plot coordinates.
-        The spacing is relative to the figure's size.
-
-    vertical_spacing: float, default=0.07
-        Space between subplot cols in normalized plot coordinates.
-        The spacing is relative to the figure's size.
-
-    palette: str or sequence, default="Prism"
-        Name or color sequence for the palette.
-
-    is_canvas: bool, default=False
-        Whether the figure shows multiple plots.
-
-    backend: str, default="plotly"
-        Figure's backend. Choose between plotly or matplotlib.
-
-    create_figure: bool, default=True
-        Whether to create a new figure.
-
-    """
-
-    _marker = ["circle", "x", "diamond", "pentagon", "star", "hexagon"]
-    _dash = [None, "dashdot", "dash", "dot", "longdash", "longdashdot"]
-    _shape = ["", "/", "x", "\\", "-", "|", "+", "."]
-
-    def __init__(
-        self,
-        rows: INT = 1,
-        cols: INT = 1,
-        horizontal_spacing: FLOAT = 0.05,
-        vertical_spacing: FLOAT = 0.07,
-        palette: str | SEQUENCE = "Prism",
-        is_canvas: BOOL = False,
-        backend: str = "plotly",
-        create_figure: BOOL = True,
-    ):
-        self.rows = rows
-        self.cols = cols
-        self.horizontal_spacing = horizontal_spacing
-        self.vertical_spacing = vertical_spacing
-        if isinstance(palette, str):
-            self._palette = getattr(px.colors.qualitative, palette)
-            self.palette = cycle(self._palette)
-        else:
-            # Convert color names or hex to rgb
-            self._palette = list(map(to_rgb, palette))
-            self.palette = cycle(self._palette)
-        self.is_canvas = is_canvas
-        self.backend = backend
-        self.create_figure = create_figure
-
-        self.idx = 0  # N-th plot in the canvas
-        self.axes = 0  # N-th axis in the canvas
-        if self.create_figure:
-            if self.backend == "plotly":
-                self.figure = go.Figure()
-            else:
-                self.figure, _ = plt.subplots()
-
-        self.groups = []
-        self.style = dict(palette={}, marker={}, dash={}, shape={})
-        self.marker = cycle(self._marker)
-        self.dash = cycle(self._dash)
-        self.shape = cycle(self._shape)
-
-        self.pos = {}  # Subplot position to use for title
-        self.custom_layout = {}  # Layout params specified by user
-        self.used_models = []  # Models plotted in this figure
-
-        # Perform parameter checks
-        if not 0 < horizontal_spacing < 1:
-            raise ValueError(
-                "Invalid value for the horizontal_spacing parameter. The "
-                f"value must lie between 0 and 1, got {horizontal_spacing}."
-            )
-
-        if not 0 < vertical_spacing < 1:
-            raise ValueError(
-                "Invalid value for the vertical_spacing parameter. The "
-                f"value must lie between 0 and 1, got {vertical_spacing}."
-            )
-
-    @property
-    def grid(self) -> tuple[INT, INT]:
-        """Position of the current axes on the grid.
-
-        Returns
-        -------
-        int
-            X-position.
-
-        int
-            Y-position.
-
-        """
-        return (self.idx - 1) // self.cols + 1, self.idx % self.cols or self.cols
-
-    @property
-    def next_subplot(self) -> go.Figure | plt.Figure | None:
-        """Increase the subplot index.
-
-        Returns
-        -------
-        go.Figure, plt.Figure or None
-            Current figure. Returns None if `create_figure=False`.
-
-        """
-        # Check if there are too many plots in the canvas
-        if self.idx >= self.rows * self.cols:
-            raise ValueError(
-                "Invalid number of plots in the canvas! Increase "
-                "the number of rows and cols to add more plots."
-            )
-        else:
-            self.idx += 1
-
-        if self.create_figure:
-            return self.figure
-
-    def get_elem(self, name: SCALAR | str | None = None, element: str = "palette") -> str:
-        """Get the plot element for a specific name.
-
-        This method is used to assign the same element (color, marker,
-        etc...) to the same columns and models in a plot.
-
-        Parameters
-        ----------
-        name: int, float or str or None
-            Name for which to get the plot element. The name is stored in
-            the element attributes to assign the same element to all calls
-            with the same name.
-
-        element: str, default="palette"
-            Plot element to get. Choose from: palette, marker, dash, shape.
-
-        Returns
-        -------
-        str
-            Element code.
-
-        """
-        if name is None:
-            return getattr(self, f"_{element}")[0]  # Get first element (default)
-        elif name in self.style[element]:
-            return self.style[element][name]
-        else:
-            return self.style[element].setdefault(name, next(getattr(self, element)))
-
-    def showlegend(self, name: str, legend: str | dict | None) -> BOOL:
-        """Get whether the trace should be showed in the legend.
-
-        If there's already a trace with the same name, it's not
-        necessary to show it in the plot's legend.
-
-        Parameters
-        ----------
-        name: str
-            Name of the trace.
-
-        legend: str, dict or None
-            Legend parameter.
-
-        Returns
-        -------
-        bool
-            Whether the trace should be placed in the legend.
-
-        """
-        if name in self.groups:
-            return False
-        else:
-            self.groups.append(name)
-            return legend is not None
-
-    def get_axes(
-        self,
-        x: tuple[INT, INT] = (0, 1),
-        y: tuple[INT, INT] = (0, 1),
-        coloraxis: dict | None = None,
-    ) -> tuple[str, str]:
-        """Create and update the plot's axes.
-
-        Parameters
-        ----------
-        x: tuple of int
-            Relative x-size of the plot.
-
-        y: tuple of int
-            Relative y-size of the plot.
-
-        coloraxis: dict or None
-            Properties of the coloraxis to create. None to ignore.
-
-        Returns
-        -------
-        str
-            Name of the x-axis.
-
-        str
-            Name of the y-axis.
-
-        """
-        self.axes += 1
-
-        # Calculate the distance between subplots
-        x_offset = divide(self.horizontal_spacing, (self.cols - 1))
-        y_offset = divide(self.vertical_spacing, (self.rows - 1))
-
-        # Calculate the size of the subplot
-        x_size = (1 - ((x_offset * 2) * (self.cols - 1))) / self.cols
-        y_size = (1 - ((y_offset * 2) * (self.rows - 1))) / self.rows
-
-        # Calculate the size of the axes
-        ax_size = (x[1] - x[0]) * x_size
-        ay_size = (y[1] - y[0]) * y_size
-
-        # Determine the position for the axes
-        x_pos = (self.grid[1] - 1) * (x_size + 2 * x_offset) + x[0] * x_size
-        y_pos = (self.rows - self.grid[0]) * (y_size + 2 * y_offset) + y[0] * y_size
-
-        # Store positions for subplot title
-        self.pos[str(self.axes)] = (x_pos + ax_size / 2, rnd(y_pos + ay_size))
-
-        # Update the figure with the new axes
-        self.figure.update_layout(
-            {
-                f"xaxis{self.axes}": dict(
-                    domain=(x_pos, rnd(x_pos + ax_size)), anchor=f"y{self.axes}"
-                ),
-                f"yaxis{self.axes}": dict(
-                    domain=(y_pos, rnd(y_pos + ay_size)), anchor=f"x{self.axes}"
-                ),
-            }
-        )
-
-        # Place a colorbar right of the axes
-        if coloraxis:
-            if title := coloraxis.pop("title", None):
-                coloraxis["colorbar_title"] = dict(
-                    text=title, side="right", font_size=coloraxis.pop("font_size")
-                )
-
-            coloraxis["colorbar_x"] = rnd(x_pos + ax_size) + ax_size / 40
-            coloraxis["colorbar_xanchor"] = "left"
-            coloraxis["colorbar_y"] = y_pos + ay_size / 2
-            coloraxis["colorbar_yanchor"] = "middle"
-            coloraxis["colorbar_len"] = ay_size * 0.9
-            coloraxis["colorbar_thickness"] = ax_size * 30  # Default width in pixels
-            self.figure.update_layout(
-                {f"coloraxis{coloraxis.pop('axes', self.axes)}": coloraxis}
-            )
-
-        xaxis = f"x{self.axes if self.axes > 1 else ''}"
-        yaxis = f"y{self.axes if self.axes > 1 else ''}"
-        return xaxis, yaxis
-
-
-@typechecked
-class BasePlot:
-    """Base class for all plotting methods.
-
-    This base class defines the properties that can be changed
-    to customize the plot's aesthetics.
-
-    """
-
-    _fig = None
-    _custom_layout = {}
-    _custom_traces = {}
-    _aesthetics = Aesthetics(
-        palette=list(PALETTE),
-        title_fontsize=24,
-        label_fontsize=16,
-        tick_fontsize=12,
-        line_width=2,
-        marker_size=8,
-    )
-
-    # Properties =================================================== >>
-
-    @property
-    def aesthetics(self) -> dict:
-        """All plot aesthetic attributes."""
-        return self._aesthetics
-
-    @aesthetics.setter
-    def aesthetics(self, value: dict):
-        self.palette = value.get("palette", self.palette)
-        self.title_fontsize = value.get("title_fontsize", self.title_fontsize)
-        self.label_fontsize = value.get("label_fontsize", self.label_fontsize)
-        self.tick_fontsize = value.get("tick_fontsize", self.tick_fontsize)
-        self.line_width = value.get("line_width", self.line_width)
-        self.marker_size = value.get("marker_size", self.marker_size)
-
-    @property
-    def palette(self) -> str | SEQUENCE:
-        """Color palette.
-
-        Specify one of plotly's [built-in palettes][palette] or create
-        a custom one, e.g. `atom.palette = ["red", "green", "blue"]`.
-
-        """
-        return self._aesthetics.palette
-
-    @palette.setter
-    def palette(self, value: str | SEQUENCE):
-        if isinstance(value, str) and not hasattr(px.colors.qualitative, value):
-            raise ValueError(
-                f"Invalid value for the palette parameter, got {value}. Choose "
-                f"from one of plotly's built-in qualitative color sequences in "
-                f"the px.colors.qualitative module or define your own sequence."
-            )
-
-        self._aesthetics.palette = value
-
-    @property
-    def title_fontsize(self) -> INT:
-        """Fontsize for the plot's title."""
-        return self._aesthetics.title_fontsize
-
-    @title_fontsize.setter
-    def title_fontsize(self, value: INT):
-        if value <= 0:
-            raise ValueError(
-                "Invalid value for the title_fontsize parameter. "
-                f"Value should be >=0, got {value}."
-            )
-
-        self._aesthetics.title_fontsize = value
-
-    @property
-    def label_fontsize(self) -> INT:
-        """Fontsize for the labels, legend and hover information."""
-        return self._aesthetics.label_fontsize
-
-    @label_fontsize.setter
-    def label_fontsize(self, value: INT):
-        if value <= 0:
-            raise ValueError(
-                "Invalid value for the label_fontsize parameter. "
-                f"Value should be >=0, got {value}."
-            )
-
-        self._aesthetics.label_fontsize = value
-
-    @property
-    def tick_fontsize(self) -> INT:
-        """Fontsize for the ticks along the plot's axes."""
-        return self._aesthetics.tick_fontsize
-
-    @tick_fontsize.setter
-    def tick_fontsize(self, value: INT):
-        if value <= 0:
-            raise ValueError(
-                "Invalid value for the tick_fontsize parameter. "
-                f"Value should be >=0, got {value}."
-            )
-
-        self._aesthetics.tick_fontsize = value
-
-    @property
-    def line_width(self) -> INT:
-        """Width of the line plots."""
-        return self._aesthetics.line_width
-
-    @line_width.setter
-    def line_width(self, value: INT):
-        if value <= 0:
-            raise ValueError(
-                "Invalid value for the line_width parameter. "
-                f"Value should be >=0, got {value}."
-            )
-
-        self._aesthetics.line_width = value
-
-    @property
-    def marker_size(self) -> INT:
-        """Size of the markers."""
-        return self._aesthetics.marker_size
-
-    @marker_size.setter
-    def marker_size(self, value: INT):
-        if value <= 0:
-            raise ValueError(
-                "Invalid value for the marker_size parameter. "
-                f"Value should be >=0, got {value}."
-            )
-
-        self._aesthetics.marker_size = value
-
-    # Methods ====================================================== >>
-
-    @staticmethod
-    def _get_plot_index(df: DATAFRAME) -> INDEX:
-        """Return the dataset's index in a plottable format.
-
-        Plotly does not accept all index formats (e.g. pd.Period),
-        thus use this utility method to convert to timestamp those
-        indices that can, else return as is.
-
-        Parameters
-        ----------
-        df: dataframe
-            Data set to get the index from.
-
-        Returns
-        -------
-        index
-            Index in an acceptable format.
-
-        """
-        if hasattr(df.index, "to_timestamp"):
-            return df.index.to_timestamp()
-        else:
-            return df.index
-
-    @staticmethod
-    def _get_show(show: INT | None, model: MODEL | list[MODEL]) -> INT:
-        """Check and return the number of features to show.
-
-        Parameters
-        ----------
-        show: int or None
-            Number of features to show. If None, select all (max 200).
-
-        model: Model or list
-            Models from which to get the features.
-
-        Returns
-        -------
-        int
-            Number of features to show.
-
-        """
-        max_fxs = max(m.n_features for m in lst(model))
-        if show is None or show > max_fxs:
-            # Limit max features shown to avoid maximum figsize error
-            show = min(200, max_fxs)
-        elif show < 1:
-            raise ValueError(
-                f"Invalid value for the show parameter. Value should be >0, got {show}."
-            )
-
-        return show
-
-    @staticmethod
-    def _get_hyperparams(
-        params: str | slice | SEQUENCE | None,
-        model: MODEL,
-    ) -> list[str]:
-        """Check and return a model's hyperparameters.
-
-        Parameters
-        ----------
-        params: str, slice, sequence or None
-            Hyperparameters to get. Use a sequence or add `+` between
-            options to select more than one. If None, all the model's
-            hyperparameters are selcted.
-
-        model: Model
-            Get the params from this model.
-
-        Returns
-        -------
-        list of str
-            Selected hyperparameters.
-
-        """
-        if params is None:
-            hyperparameters = list(model._ht["distributions"])
-        elif isinstance(params, slice):
-            hyperparameters = list(model._ht["distributions"])[params]
-        else:
-            hyperparameters = []
-            for param in lst(params):
-                if isinstance(param, INT_TYPES):
-                    hyperparameters.append(list(model._ht["distributions"])[param])
-                elif isinstance(param, str):
-                    for p in param.split("+"):
-                        if p not in model._ht["distributions"]:
-                            raise ValueError(
-                                "Invalid value for the params parameter. "
-                                f"Hyperparameter {p} was not used during the "
-                                f"optimization of model {model.name}."
-                            )
-                        else:
-                            hyperparameters.append(p)
-
-        if not hyperparameters:
-            raise ValueError(f"Didn't find any hyperparameters for model {model.name}.")
-
-        return hyperparameters
-
-    def _get_metric(
-        self,
-        metric: INT | str | SEQUENCE,
-        max_one: BOOL,
-    ) -> INT | str | list[INT]:
-        """Check and return the provided metric index.
-
-        Parameters
-        ----------
-        metric: int, str, sequence or None
-            Metric to retrieve. If None, all metrics are returned.
-
-        max_one: bool
-            Whether one or multiple metrics are allowed.
-
-        Returns
-        -------
-        int or list
-            Position index of the metric. If `max_one=False`, returns
-            a list of metric positions.
-
-        """
-        if metric is None:
-            return list(range(len(self._metric)))
-        else:
-            inc = []
-            for met in lst(metric):
-                if isinstance(met, INT_TYPES):
-                    if 0 <= met < len(self._metric):
-                        inc.append(met)
-                    else:
-                        raise ValueError(
-                            f"Invalid value for the metric parameter. Value {met} is out "
-                            f"of range for a pipeline with {len(self._metric)} metrics."
-                        )
-                elif isinstance(met, str):
-                    met = met.lower()
-                    for m in met.split("+"):
-                        if m in ("time_ht", "time_fit", "time_bootstrap", "time"):
-                            inc.append(m)
-                        elif (name := get_custom_scorer(m).name) in self.metric:
-                            inc.append(self._metric.index(name))
-                        else:
-                            raise ValueError(
-                                "Invalid value for the metric parameter. The "
-                                f"{name} metric wasn't used to fit the models."
-                            )
-
-        if len(inc) > 1 and max_one:
-            raise ValueError(
-                "Invalid value for the metric parameter. "
-                f"Only one metric is allowed, got {inc}."
-            )
-
-        return inc[0] if max_one else inc
-
-    def _get_set(
-        self,
-        dataset: str | SEQUENCE,
-        max_one: BOOL,
-        allow_holdout: BOOL = True,
-    ) -> str | list[str]:
-        """Check and return the provided data set.
-
-        Parameters
-        ----------
-        dataset: str or sequence
-            Name(s) of the data set to retrieve.
-
-        max_one: bool
-            Whether one or multiple data sets are allowed. If True, return
-            the data set instead of a list.
-
-        allow_holdout: bool, default=True
-            Whether to allow the retrieval of the holdout set.
-
-        Returns
-        -------
-        str or list
-            Selected data set(s).
-
-        """
-        for ds in (dataset := "+".join(lst(dataset)).lower().split("+")):
-            if ds == "holdout":
-                if allow_holdout:
-                    if self.holdout is None:
-                        raise ValueError(
-                            "Invalid value for the dataset parameter. No holdout "
-                            "data set was specified when initializing the instance."
-                        )
-                else:
-                    raise ValueError(
-                        "Invalid value for the dataset parameter, got "
-                        f"{ds}. Choose from: train, test."
-                    )
-            elif ds not in ("train", "test"):
-                raise ValueError(
-                    "Invalid value for the dataset parameter, got {ds}. "
-                    f"Choose from: train, test{', holdout' if allow_holdout else ''}."
-                )
-
-        if max_one and len(dataset) > 1:
-            raise ValueError(
-                "Invalid value for the dataset parameter, got "
-                f"{dataset}. Only one data set is allowed."
-            )
-
-        return dataset[0] if max_one else dataset
-
-    def _get_figure(self, **kwargs) -> go.Figure | plt.Figure:
-        """Return existing figure if in canvas, else a new figure.
-
-        Every time this method is called from a canvas, the plot
-        index is raised by one to keep track in which subplot the
-        BaseFigure is at.
-
-        Parameters
-        ----------
-        **kwargs
-            Additional keyword arguments for BaseFigure.
-
-        Returns
-        -------
-        [go.Figure][] or [plt.Figure][]
-            Existing figure or newly created.
-
-        """
-        if BasePlot._fig and BasePlot._fig.is_canvas:
-            return BasePlot._fig.next_subplot
-        else:
-            BasePlot._fig = BaseFigure(palette=self.palette, **kwargs)
-            return BasePlot._fig.next_subplot
-
-    def _draw_line(
-        self,
-        parent: str,
-        child: str | None = None,
-        legend: str | dict = None,
-        **kwargs,
-    ) -> go.Scatter:
-        """Draw a line.
-
-        Unify the style to draw a line, where parent and child
-        (e.g. model - data set or column - distribution) keep the
-        same style (color or dash). A legendgroup title is only added
-        when there is a child element.
-
-        Parameters
-        ----------
-        parent: str
-            Name of the model.
-
-        child: str or None, default=None
-            Data set which is plotted.
-
-        legend: str, dict or None
-            Legend argument provided by the user.
-
-        **kwargs
-            Additional keyword arguments for the trace.
-
-        Returns
-        -------
-        go.Scatter
-            New trace to add to figure.
-
-        """
-        legendgrouptitle = dict(text=parent, font_size=self.label_fontsize)
-        hover = f"(%{{x}}, %{{y}})<extra>{parent}{f' - {child}' if child else ''}</extra>"
-        return go.Scatter(
-            line=dict(
-                width=self.line_width,
-                color=BasePlot._fig.get_elem(parent),
-                dash=BasePlot._fig.get_elem(child, "dash"),
-            ),
-            marker=dict(
-                symbol=BasePlot._fig.get_elem(child, "marker"),
-                size=self.marker_size,
-                color=BasePlot._fig.get_elem(parent),
-                line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-            ),
-            hovertemplate=kwargs.pop("hovertemplate", hover),
-            name=kwargs.pop("name", child or parent),
-            legendgroup=kwargs.pop("legendgroup", parent),
-            legendgrouptitle=legendgrouptitle if child else None,
-            showlegend=BasePlot._fig.showlegend(f"{parent}-{child}", legend),
-            **kwargs,
-        )
-
-    @staticmethod
-    def _draw_straight_line(y: SCALAR | str, xaxis: str, yaxis: str):
-        """Draw a line across the axis.
-
-        The line can be either horizontal or diagonal. The line should
-        be used as reference. It's not added to the legend and doesn't
-        show any information on hover.
-
-        Parameters
-        ----------
-        y: int, float or str, default = "diagonal"
-            Coordinates on the y-axis. If a value, draw a horizontal line
-            at that value. If "diagonal", draw a diagonal line from x.
-
-        xaxis: str
-            Name of the x-axis to draw in.
-
-        yaxis: str
-            Name of the y-axis to draw in.
-
-        """
-        BasePlot._fig.figure.add_shape(
-            type="line",
-            x0=0,
-            x1=1,
-            y0=0 if y == "diagonal" else y,
-            y1=1 if y == "diagonal" else y,
-            xref=f"{xaxis} domain",
-            yref=f"{yaxis} domain" if y == "diagonal" else yaxis,
-            line=dict(width=1, color="black", dash="dash"),
-            opacity=0.6,
-            layer="below",
-        )
-
-    def _plot(
-        self,
-        fig: go.Figure | plt.Figure | None = None,
-        ax: plt.Axes | tuple[str, str] | None = None,
-        **kwargs,
-    ) -> go.Figure | plt.Figure | None:
-        """Make the plot.
-
-        Customize the axes to the default layout and plot the figure
-        if it's not part of a canvas.
-
-        Parameters
-        ----------
-        fig: go.Figure, plt.Figure or None
-            Current figure. If None, use `plt.gcf()`.
-
-        ax: plt.Axes, tuple or None, default=None
-            Axis object or names of the axes to update. If None, ignore
-            their update.
-
-        **kwargs
-            Keyword arguments containing the figure's parameters.
-
-            - title: Name of the title or custom configuration.
-            - legend: Whether to show the legend or custom configuration.
-            - xlabel: Label for the x-axis.
-            - ylabel: Label for the y-axis.
-            - xlim: Limits for the x-axis.
-            - ylim: Limits for the y-axis.
-            - figsize: Size of the figure.
-            - filename: Name of the saved file.
-            - plotname: Name of the plot.
-            - display: Whether to show the plot. If None, return the figure.
-
-        Returns
-        -------
-        plt.Figure, go.Figure or None
-            Created figure. Only returned if `display=None`.
-
-        """
-        # Set name with which to save the file
-        if kwargs.get("filename"):
-            if kwargs["filename"].endswith("auto"):
-                name = kwargs["filename"].replace("auto", kwargs["plotname"])
-            else:
-                name = kwargs["filename"]
-        else:
-            name = kwargs.get("plotname")
-
-        fig = fig or BasePlot._fig.figure
-        if BasePlot._fig.backend == "plotly":
-            if ax:
-                fig.update_layout(
-                    {
-                        f"{ax[0]}_title": dict(
-                            text=kwargs.get("xlabel"), font_size=self.label_fontsize
-                        ),
-                        f"{ax[1]}_title": dict(
-                            text=kwargs.get("ylabel"), font_size=self.label_fontsize
-                        ),
-                        f"{ax[0]}_range": kwargs.get("xlim"),
-                        f"{ax[1]}_range": kwargs.get("ylim"),
-                        f"{ax[0]}_automargin": True,
-                        f"{ax[1]}_automargin": True,
-                    }
-                )
-
-                if BasePlot._fig.is_canvas and (title := kwargs.get("title")):
-                    # Add a subtitle to a plot in the canvas
-                    default_title = {
-                        "x": BasePlot._fig.pos[ax[0][5:] or "1"][0],
-                        "y": BasePlot._fig.pos[ax[0][5:] or "1"][1] + 0.005,
-                        "xref": "paper",
-                        "yref": "paper",
-                        "xanchor": "center",
-                        "yanchor": "bottom",
-                        "showarrow": False,
-                        "font_size": self.title_fontsize - 4,
-                    }
-
-                    if isinstance(title, dict):
-                        title = {**default_title, **title}
-                    else:
-                        title = {"text": title, **default_title}
-
-                    fig.update_layout(dict(annotations=fig.layout.annotations + (title,)))
-
-            if not BasePlot._fig.is_canvas and kwargs.get("plotname"):
-                default_title = dict(
-                    x=0.5,
-                    y=1,
-                    pad=dict(t=15, b=15),
-                    xanchor="center",
-                    yanchor="top",
-                    xref="paper",
-                    font_size=self.title_fontsize,
-                )
-                if isinstance(title := kwargs.get("title"), dict):
-                    title = {**default_title, **title}
-                else:
-                    title = {"text": title, **default_title}
-
-                default_legend = dict(
-                    traceorder="grouped",
-                    groupclick=kwargs.get("groupclick", "toggleitem"),
-                    font_size=self.label_fontsize,
-                    bgcolor="rgba(255, 255, 255, 0.5)",
-                )
-                if isinstance(legend := kwargs.get("legend"), str):
-                    position = {}
-                    legend = legend.lower()
-                    if legend == "upper left":
-                        position = dict(x=0.01, y=0.99, xanchor="left", yanchor="top")
-                    elif legend == "lower left":
-                        position = dict(x=0.01, y=0.01, xanchor="left", yanchor="bottom")
-                    elif legend == "upper right":
-                        position = dict(x=0.99, y=0.99, xanchor="right", yanchor="top")
-                    elif legend == "lower right":
-                        position = dict(x=0.99, y=0.01, xanchor="right", yanchor="bottom")
-                    elif legend == "upper center":
-                        position = dict(x=0.5, y=0.99, xanchor="center", yanchor="top")
-                    elif legend == "lower center":
-                        position = dict(x=0.5, y=0.01, xanchor="center", yanchor="bottom")
-                    elif legend == "center left":
-                        position = dict(x=0.01, y=0.5, xanchor="left", yanchor="middle")
-                    elif legend == "center right":
-                        position = dict(x=0.99, y=0.5, xanchor="right", yanchor="middle")
-                    elif legend == "center":
-                        position = dict(x=0.5, y=0.5, xanchor="center", yanchor="middle")
-                    elif legend != "out":
-                        raise ValueError(
-                            "Invalid value for the legend parameter. Got unknown "
-                            f"position: {legend}. Choose from: upper left, upper "
-                            "right, lower left, lower right, upper center, lower "
-                            "center, center left, center right, center, out."
-                        )
-                    legend = {**default_legend, **position}
-                elif isinstance(legend, dict):
-                    legend = {**default_legend, **legend}
-
-                # Update layout with predefined settings
-                space1 = self.title_fontsize if title.get("text") else 10
-                space2 = self.title_fontsize * int(bool(fig.layout.annotations))
-                fig.update_layout(
-                    title=title,
-                    legend=legend,
-                    showlegend=bool(kwargs.get("legend")),
-                    hoverlabel=dict(font_size=self.label_fontsize),
-                    font_size=self.tick_fontsize,
-                    margin=dict(l=50, b=50, r=0, t=25 + space1 + space2, pad=0),
-                    width=kwargs["figsize"][0],
-                    height=kwargs["figsize"][1],
-                )
-
-                # Update plot with custom settings
-                fig.update_traces(**self._custom_traces)
-                fig.update_layout(**self._custom_layout)
-
-                if kwargs.get("filename"):
-                    if "." not in name or name.endswith(".html"):
-                        fig.write_html(name if "." in name else name + ".html")
-                    else:
-                        fig.write_image(name)
-
-                # Log plot to mlflow run of every model visualized
-                if getattr(self, "experiment", None) and self.log_plots:
-                    for m in set(BasePlot._fig.used_models):
-                        MlflowClient().log_figure(
-                            run_id=m._run.info.run_id,
-                            figure=fig,
-                            artifact_file=name if "." in name else f"{name}.html",
-                        )
-
-                if kwargs.get("display") is True:
-                    fig.show()
-                elif kwargs.get("display") is None:
-                    return fig
-
-        else:
-            if kwargs.get("title"):
-                ax.set_title(kwargs.get("title"), fontsize=self.title_fontsize, pad=20)
-            if kwargs.get("xlabel"):
-                ax.set_xlabel(kwargs["xlabel"], fontsize=self.label_fontsize, labelpad=12)
-            if kwargs.get("ylabel"):
-                ax.set_ylabel(kwargs["ylabel"], fontsize=self.label_fontsize, labelpad=12)
-            if ax is not None:
-                ax.tick_params(axis="both", labelsize=self.tick_fontsize)
-
-            if kwargs.get("figsize"):
-                # Convert from pixels to inches
-                fig.set_size_inches(
-                    kwargs["figsize"][0] // fig.get_dpi(),
-                    kwargs["figsize"][1] // fig.get_dpi(),
-                )
-            plt.tight_layout()
-            if kwargs.get("filename"):
-                fig.savefig(name)
-
-            # Log plot to mlflow run of every model visualized
-            if self.experiment and self.log_plots:
-                for m in set(BasePlot._fig.used_models):
-                    MlflowClient().log_figure(
-                        run_id=m._run.info.run_id,
-                        figure=fig,
-                        artifact_file=name if "." in name else f"{name}.png",
-                    )
-
-            plt.show() if kwargs.get("display") else plt.close()
-            if kwargs.get("display") is None:
-                return fig
-
-    @composed(contextmanager, crash)
-    def canvas(
-        self,
-        rows: INT = 1,
-        cols: INT = 2,
-        *,
-        horizontal_spacing: FLOAT = 0.05,
-        vertical_spacing: FLOAT = 0.07,
-        title: str | dict | None = None,
-        legend: str | dict | None = "out",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: BOOL = True,
-    ):
-        """Create a figure with multiple plots.
-
-        This `@contextmanager` allows you to draw many plots in one
-        figure. The default option is to add two plots side by side.
-        See the [user guide][canvas] for an example.
-
-        Parameters
-        ----------
-        rows: int, default=1
-            Number of plots in length.
-
-        cols: int, default=2
-            Number of plots in width.
-
-        horizontal_spacing: float, default=0.05
-            Space between subplot rows in normalized plot coordinates.
-            The spacing is relative to the figure's size.
-
-        vertical_spacing: float, default=0.07
-            Space between subplot cols in normalized plot coordinates.
-            The spacing is relative to the figure's size.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: bool, str or dict, default="out"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of plots in the canvas.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool, default=True
-            Whether to render the plot.
-
-        Yields
-        ------
-        [go.Figure][]
-            Plot object.
-
-        """
-        BasePlot._fig = BaseFigure(
-            rows=rows,
-            cols=cols,
-            horizontal_spacing=horizontal_spacing,
-            vertical_spacing=vertical_spacing,
-            palette=self.palette,
-            is_canvas=True,
-        )
-
-        try:
-            yield BasePlot._fig.figure
-        finally:
-            BasePlot._fig.is_canvas = False  # Close the canvas
-            self._plot(
-                groupclick="togglegroup",
-                title=title,
-                legend=legend,
-                figsize=figsize or (550 + 350 * cols, 200 + 400 * rows),
-                plotname="canvas",
-                filename=filename,
-                display=display,
-            )
-
-    def reset_aesthetics(self):
-        """Reset the plot [aesthetics][] to their default values."""
-        self._custom_layout = {}
-        self._custom_traces = {}
-        self._aesthetics = Aesthetics(
-            palette=PALETTE,
-            title_fontsize=24,
-            label_fontsize=16,
-            tick_fontsize=12,
-            line_width=2,
-            marker_size=8,
-        )
-
-    def update_layout(self, **kwargs):
-        """Update the properties of the plot's layout.
-
-        Recursively update the structure of the original layout with
-        the values in the arguments.
-
-        Parameters
-        ----------
-        **kwargs
-            Keyword arguments for the figure's [update_layout][] method.
-
-        """
-        self._custom_layout = kwargs
-
-    def update_traces(self, **kwargs):
-        """Update the properties of the plot's traces.
-
-        Recursively update the structure of the original traces with
-        the values in the arguments.
-
-        Parameters
-        ----------
-        **kwargs
-            Keyword arguments for the figure's [update_traces][] method.
-
-        """
-        self._custom_traces = kwargs
-
-
-@typechecked
-class FeatureSelectorPlot(BasePlot):
-    """Feature selection plots.
-
-    These plots are accessible from atom or from the FeatureSelector
-    class when the appropriate feature selection strategy is used.
-
-    """
-
-    @available_if(has_attr("pca"))
-    @crash
-    def plot_components(
-        self,
-        show: INT | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the explained variance ratio per component.
-
-        Kept components are colored and discarted components are
-        transparent. This plot is available only when feature selection
-        was applied with strategy="pca".
-
-        Parameters
-        ----------
-        show: int or None, default=None
-            Number of components to show. None to show all.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of components shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:FeatureSelectorPlot.plot_pca
-        atom.plots:FeatureSelectorPlot.plot_rfecv
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.feature_selection("pca", n_features=5)
-        atom.plot_components(show=10)
-        ```
-
-        """
-        if show is None or show > self.pca.components_.shape[0]:
-            # Limit max features shown to avoid maximum figsize error
-            show = min(200, self.pca.components_.shape[0])
-        elif show < 1:
-            raise ValueError(
-                "Invalid value for the show parameter. "
-                f"Value should be >0, got {show}."
-            )
-
-        # Get the variance ratio per component
-        variance = np.array(self.pca.explained_variance_ratio_)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        # Create color scheme: first normal and then fully transparent
-        color = BasePlot._fig.get_elem("components")
-        opacity = [0.2] * self.pca._comps + [0] * (len(variance) - self.pca._comps)
-
-        fig.add_trace(
-            go.Bar(
-                x=variance,
-                y=[f"pca{str(i)}" for i in range(len(variance))],
-                orientation="h",
-                marker=dict(
-                    color=[f"rgba({color[4:-1]}, {o})" for o in opacity],
-                    line=dict(width=2, color=color),
-                ),
-                hovertemplate="%{x}<extra></extra>",
-                name=f"Variance retained: {variance[:self.pca._comps].sum():.3f}",
-                legendgroup="components",
-                showlegend=BasePlot._fig.showlegend("components", legend),
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        fig.update_layout({f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending")})
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Explained variance ratio",
-            ylim=(len(variance) - show - 0.5, len(variance) - 0.5),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_components",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_attr("pca"))
-    @crash
-    def plot_pca(
-        self,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the explained variance ratio vs number of components.
-
-        If the underlying estimator is [PCA][] (for dense datasets),
-        all possible components are plotted. If the underlying estimator
-        is [TruncatedSVD][] (for sparse datasets), it only shows the
-        selected components. The star marks the number of components
-        selected by the user. This plot is available only when feature
-        selection was applied with strategy="pca".
-
-        Parameters
-        ----------
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:FeatureSelectorPlot.plot_components
-        atom.plots:FeatureSelectorPlot.plot_rfecv
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.feature_selection("pca", n_features=5)
-        atom.plot_pca()
-        ```
-
-        """
-        # Create star symbol at selected number of components
-        symbols = ["circle"] * self.pca.n_features_in_
-        symbols[self.pca._comps - 1] = "star"
-        sizes = [self.marker_size] * self.pca.n_features_in_
-        sizes[self.pca._comps - 1] = self.marker_size * 1.5
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        fig.add_trace(
-            go.Scatter(
-                x=tuple(range(1, self.pca.n_features_in_ + 1)),
-                y=np.cumsum(self.pca.explained_variance_ratio_),
-                mode="lines+markers",
-                line=dict(width=self.line_width, color=BasePlot._fig.get_elem("pca")),
-                marker=dict(
-                    symbol=symbols,
-                    size=sizes,
-                    line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                    opacity=1,
-                ),
-                hovertemplate="%{y}<extra></extra>",
-                showlegend=False,
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        fig.update_layout(
-            {
-                "hovermode": "x",
-                f"xaxis{xaxis[1:]}_showspikes": True,
-                f"yaxis{yaxis[1:]}_showspikes": True,
-            }
-        )
-
-        margin = self.pca.n_features_in_ / 30
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="First N principal components",
-            ylabel="Cumulative variance ratio",
-            xlim=(1 - margin, self.pca.n_features_in_ - 1 + margin),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_pca",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_attr("rfecv"))
-    @crash
-    def plot_rfecv(
-        self,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the rfecv results.
-
-        Plot the scores obtained by the estimator fitted on every
-        subset of the dataset. Only available when feature selection
-        was applied with strategy="rfecv".
-
-        Parameters
-        ----------
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:FeatureSelectorPlot.plot_components
-        atom.plots:FeatureSelectorPlot.plot_pca
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.feature_selection("rfecv", solver="Tree")
-        atom.plot_rfecv()
-        ```
-
-        """
-        try:  # Define the y-label for the plot
-            ylabel = self.rfecv.get_params()["scoring"].name
-        except AttributeError:
-            ylabel = "accuracy" if self.goal.startswith("class") else "r2"
-
-        x = range(self.rfecv.min_features_to_select, self.rfecv.n_features_in_ + 1)
-
-        # Create star symbol at selected number of features
-        sizes = [6] * len(x)
-        sizes[self.rfecv.n_features_ - self.rfecv.min_features_to_select] = 12
-        symbols = ["circle"] * len(x)
-        symbols[self.rfecv.n_features_ - self.rfecv.min_features_to_select] = "star"
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        mean = self.rfecv.cv_results_["mean_test_score"]
-        std = self.rfecv.cv_results_["std_test_score"]
-
-        fig.add_trace(
-            go.Scatter(
-                x=list(x),
-                y=mean,
-                mode="lines+markers",
-                line=dict(width=self.line_width, color=BasePlot._fig.get_elem("rfecv")),
-                marker=dict(
-                    symbol=symbols,
-                    size=sizes,
-                    line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                    opacity=1,
-                ),
-                name=ylabel,
-                legendgroup="rfecv",
-                showlegend=BasePlot._fig.showlegend("rfecv", legend),
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        # Add error bands
-        fig.add_traces(
-            [
-                go.Scatter(
-                    x=tuple(x),
-                    y=mean + std,
-                    mode="lines",
-                    line=dict(width=1, color=BasePlot._fig.get_elem("rfecv")),
-                    hovertemplate="%{y}<extra>upper bound</extra>",
-                    legendgroup="rfecv",
-                    showlegend=False,
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                ),
-                go.Scatter(
-                    x=tuple(x),
-                    y=mean - std,
-                    mode="lines",
-                    line=dict(width=1, color=BasePlot._fig.get_elem("rfecv")),
-                    fill="tonexty",
-                    fillcolor=f"rgba{BasePlot._fig.get_elem('rfecv')[3:-1]}, 0.2)",
-                    hovertemplate="%{y}<extra>lower bound</extra>",
-                    legendgroup="rfecv",
-                    showlegend=False,
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                ),
-            ]
-        )
-
-        fig.update_layout({"hovermode": "x unified"})
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            xlabel="Number of features",
-            ylabel=ylabel,
-            xlim=(min(x) - len(x) / 30, max(x) + len(x) / 30),
-            ylim=(min(mean) - 3 * max(std), max(mean) + 3 * max(std)),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_rfecv",
-            filename=filename,
-            display=display,
-        )
-
-
-@typechecked
-class DataPlot(BasePlot):
-    """Data plots.
-
-    Plots used for understanding and interpretation of the dataset.
-    They are only accessible from atom since. The other runners should
-    be used for model training only, not for data manipulation.
-
-    """
-
-    @crash
-    def plot_correlation(
-        self,
-        columns: slice | SEQUENCE | None = None,
-        method: str = "pearson",
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (800, 700),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a correlation matrix.
-
-        Displays a heatmap showing the correlation between columns in
-        the dataset. The colors red, blue and white stand for positive,
-        negative, and no correlation respectively.
-
-        Parameters
-        ----------
-        columns: slice, sequence or None, default=None
-            Columns to plot. If None, plot all columns in the dataset.
-            Selected categorical columns are ignored.
-
-        method: str, default="pearson"
-            Method of correlation. Choose from: pearson, kendall or
-            spearman.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple, default=(800, 700)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_distribution
-        atom.plots:DataPlot.plot_qq
-        atom.plots:DataPlot.plot_relationships
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.plot_correlation()
-        ```
-
-        """
-        columns = self.branch._get_columns(columns, only_numerical=True)
-        if method.lower() not in ("pearson", "kendall", "spearman"):
-            raise ValueError(
-                f"Invalid value for the method parameter, got {method}. "
-                "Choose from: pearson, kendall or spearman."
-            )
-
-        # Compute the correlation matrix
-        corr = self.dataset[columns].corr(method=method.lower())
-
-        # Generate a mask for the lower triangle
-        # k=1 means keep outermost diagonal line
-        mask = np.zeros_like(corr, dtype=bool)
-        mask[np.triu_indices_from(mask, k=1)] = True
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes(
-            x=(0, 0.87),
-            coloraxis=dict(
-                colorscale="rdbu_r",
-                cmin=-1,
-                cmax=1,
-                title=f"{method.lower()} correlation",
-                font_size=self.label_fontsize,
-            ),
-        )
-
-        fig.add_trace(
-            go.Heatmap(
-                z=corr.mask(mask),
-                x=columns,
-                y=columns,
-                coloraxis=f"coloraxis{xaxis[1:]}",
-                hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
-                hoverongaps=False,
-                showlegend=False,
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        fig.update_layout(
-            {
-                "template": "plotly_white",
-                f"yaxis{yaxis[1:]}_autorange": "reversed",
-                f"xaxis{xaxis[1:]}_showgrid": False,
-                f"yaxis{yaxis[1:]}_showgrid": False,
-            }
-        )
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_correlation",
-            filename=filename,
-            display=display,
-        )
-
-    @crash
-    def plot_distribution(
-        self,
-        columns: SLICE = 0,
-        distributions: str | SEQUENCE | None = None,
-        show: INT | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot column distributions.
-
-        - For numerical columns, plot the probability density
-          distribution. Additionally, it's possible to plot any of
-          `scipy.stats` distributions fitted to the column.
-        - For categorical columns, plot the class distribution.
-          Only one categorical column can be plotted at the same time.
-
-        !!! tip
-            Use atom's [distribution][atomclassifier-distribution]
-            method to check which distribution fits the column best.
-
-        Parameters
-        ----------
-        columns: int, str, slice or sequence, default=0
-            Columns to plot. I's only possible to plot one categorical
-            column. If more than one categorical columns are selected,
-            all categorical columns are ignored.
-
-        distributions: str, sequence or None, default=None
-            Names of the `scipy.stats` distributions to fit to the
-            columns. If None, a [Gaussian kde distribution][kde] is
-            showed. Only for numerical columns.
-
-        show: int or None, default=None
-            Number of classes (ordered by number of occurrences) to
-            show in the plot. If None, it shows all classes. Only for
-            categorical columns.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None: No title is shown.
-            - If str: Text for the title.
-            - If dict: [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the plot's type.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_correlation
-        atom.plots:DataPlot.plot_qq
-        atom.plots:DataPlot.plot_relationships
-
-        Examples
-        --------
-        ```pycon
-        import numpy as np
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        # Add a categorical feature
-        animals = ["cat", "dog", "bird", "lion", "zebra"]
-        probabilities = [0.001, 0.1, 0.2, 0.3, 0.399]
-        X["animals"] = np.random.choice(animals, size=len(X), p=probabilities)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.plot_distribution(columns=[0, 1])
-        atom.plot_distribution(columns=0, distributions=["norm", "invgauss"])
-        atom.plot_distribution(columns="animals")
-        ```
-
-        """
-        columns = self.branch._get_columns(columns)
-        cat_columns = list(self.dataset.select_dtypes(exclude="number").columns)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        if len(columns) == 1 and columns[0] in cat_columns:
-            series = self.dataset[columns[0]].value_counts(ascending=True)
-
-            if show is None or show > len(series):
-                show = len(series)
-            elif show < 1:
-                raise ValueError(
-                    "Invalid value for the show parameter."
-                    f"Value should be >0, got {show}."
-                )
-
-            color = BasePlot._fig.get_elem()
-            fig.add_trace(
-                go.Bar(
-                    x=series,
-                    y=series.index,
-                    orientation="h",
-                    marker=dict(
-                        color=f"rgba({color[4:-1]}, 0.2)",
-                        line=dict(width=2, color=color),
-                    ),
-                    hovertemplate="%{x}<extra></extra>",
-                    name=f"{columns[0]}: {len(series)} classes",
-                    showlegend=BasePlot._fig.showlegend("dist", legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-            return self._plot(
-                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                xlabel="Counts",
-                ylim=(len(series) - show - 0.5, len(series) - 0.5),
-                title=title,
-                legend=legend,
-                figsize=figsize or (900, 400 + show * 50),
-                plotname="plot_distribution",
-                filename=filename,
-                display=display,
-            )
-
-        else:
-            for col in [c for c in columns if c not in cat_columns]:
-                fig.add_trace(
-                    go.Histogram(
-                        x=self.dataset[col],
-                        histnorm="probability density",
-                        marker=dict(
-                            color=f"rgba({BasePlot._fig.get_elem(col)[4:-1]}, 0.2)",
-                            line=dict(width=2, color=BasePlot._fig.get_elem(col)),
-                        ),
-                        nbinsx=40,
-                        name="dist",
-                        legendgroup=col,
-                        legendgrouptitle=dict(text=col, font_size=self.label_fontsize),
-                        showlegend=BasePlot._fig.showlegend(f"{col}-dist", legend),
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                x = np.linspace(self.dataset[col].min(), self.dataset[col].max(), 200)
-
-                # Drop missing values for compatibility with scipy.stats
-                missing = self.missing + [np.inf, -np.inf]
-                values = self.dataset[col].replace(missing, np.NaN).dropna()
-
-                if distributions:
-                    # Get a line for each distribution
-                    for j, dist in enumerate(lst(distributions)):
-                        params = getattr(stats, dist).fit(values)
-
-                        fig.add_trace(
-                            self._draw_line(
-                                x=x,
-                                y=getattr(stats, dist).pdf(x, *params),
-                                parent=col,
-                                child=dist,
-                                legend=legend,
-                                xaxis=xaxis,
-                                yaxis=yaxis,
-                            )
-                        )
-                else:
-                    # If no distributions specified, draw Gaussian kde
-                    fig.add_trace(
-                        self._draw_line(
-                            x=x,
-                            y=stats.gaussian_kde(values)(x),
-                            parent=col,
-                            child="kde",
-                            legend=legend,
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        )
-                    )
-
-            fig.update_layout(dict(barmode="overlay"))
-
-            return self._plot(
-                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                xlabel="Values",
-                ylabel="Probability density",
-                title=title,
-                legend=legend,
-                figsize=figsize or (900, 600),
-                plotname="plot_distribution",
-                filename=filename,
-                display=display,
-            )
-
-    @crash
-    def plot_ngrams(
-        self,
-        ngram: INT | str = "bigram",
-        index: SLICE | None = None,
-        show: INT = 10,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot n-gram frequencies.
-
-        The text for the plot is extracted from the column named
-        `corpus`. If there is no column with that name, an exception
-        is raised. If the documents are not tokenized, the words are
-        separated by spaces.
-
-        !!! tip
-            Use atom's [tokenize][atomclassifier-tokenize] method to
-            separate the words creating n-grams based on their frequency
-            in the corpus.
-
-        Parameters
-        ----------
-        ngram: str or int, default="bigram"
-            Number of contiguous words to search for (size of n-gram).
-            Choose from: words (1), bigrams (2), trigrams (3),
-            quadgrams (4).
-
-        index: int, str, slice, sequence or None, default=None
-            Documents in the corpus to include in the search. If None,
-            it selects all documents in the dataset.
-
-        show: int, default=10
-            Number of n-grams (ordered by number of occurrences) to
-            show in the plot.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of n-grams shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_wordcloud
-
-        Examples
-        --------
-        ```pycon
-        import numpy as np
-        from atom import ATOMClassifier
-        from sklearn.datasets import fetch_20newsgroups
-
-        X, y = fetch_20newsgroups(
-            return_X_y=True,
-            categories=["alt.atheism", "sci.med", "comp.windows.x"],
-            shuffle=True,
-            random_state=1,
-        )
-        X = np.array(X).reshape(-1, 1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.textclean()
-        atom.textnormalize()
-        atom.plot_ngrams()
-        ```
-
-        """
-
-        def get_text(column: SERIES) -> SERIES:
-            """Get the complete corpus as sequence of tokens.
-
-            Parameters
-            ----------
-            column: series
-                Column containing the corpus.
-
-            Returns
-            -------
-            series
-                Corpus of tokens.
-
-            """
-            if isinstance(column.iat[0], str):
-                return column.apply(lambda row: row.split())
-            else:
-                return column
-
-        corpus = get_corpus(self.X)
-        rows = self.dataset.loc[self.branch._get_rows(index, return_test=False)]
-
-        if str(ngram).lower() in ("1", "word", "words"):
-            ngram = "words"
-            series = pd.Series(
-                [word for row in get_text(rows[corpus]) for word in row]
-            ).value_counts(ascending=True)
-        else:
-            if str(ngram).lower() in ("2", "bigram", "bigrams"):
-                ngram, finder = "bigrams", BigramCollocationFinder
-            elif str(ngram).lower() in ("3", "trigram", "trigrams"):
-                ngram, finder = "trigrams", TrigramCollocationFinder
-            elif str(ngram).lower() in ("4", "quadgram", "quadgrams"):
-                ngram, finder = "quadgrams", QuadgramCollocationFinder
-            else:
-                raise ValueError(
-                    f"Invalid value for the ngram parameter, got {ngram}. "
-                    "Choose from: words, bigram, trigram, quadgram."
-                )
-
-            ngram_fd = finder.from_documents(get_text(rows[corpus])).ngram_fd
-            series = pd.Series(
-                data=[x[1] for x in ngram_fd.items()],
-                index=[" ".join(x[0]) for x in ngram_fd.items()],
-            ).sort_values(ascending=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        fig.add_trace(
-            go.Bar(
-                x=(data := series[-show:]),
-                y=data.index,
-                orientation="h",
-                marker=dict(
-                    color=f"rgba({BasePlot._fig.get_elem(ngram)[4:-1]}, 0.2)",
-                    line=dict(width=2, color=BasePlot._fig.get_elem(ngram)),
-                ),
-                hovertemplate="%{x}<extra></extra>",
-                name=f"Total {ngram}: {len(series)}",
-                legendgroup=ngram,
-                showlegend=BasePlot._fig.showlegend(ngram, legend),
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Counts",
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_ngrams",
-            filename=filename,
-            display=display,
-        )
-
-    @crash
-    def plot_qq(
-        self,
-        columns: SLICE = 0,
-        distributions: str | SEQUENCE = "norm",
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a quantile-quantile plot.
-
-        Columns are distinguished by color and the distributions are
-        distinguished by marker type. Missing values are ignored.
-
-        Parameters
-        ----------
-        columns: int, str, slice or sequence, default=0
-            Columns to plot. Selected categorical columns are ignored.
-
-        distributions: str or sequence, default="norm"
-            Names of the `scipy.stats` distributions to fit to the
-            columns.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_correlation
-        atom.plots:DataPlot.plot_distribution
-        atom.plots:DataPlot.plot_relationships
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.plot_qq(columns=[5, 6])
-        atom.plot_qq(columns=0, distributions=["norm", "invgauss", "triang"])
-        ```
-
-        """
-        columns = self.branch._get_columns(columns)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        percentiles = np.linspace(0, 100, 101)
-        for col in columns:
-            # Drop missing values for compatibility with scipy.stats
-            missing = self.missing + [np.inf, -np.inf]
-            values = self.dataset[col].replace(missing, np.NaN).dropna()
-
-            for dist in lst(distributions):
-                stat = getattr(stats, dist)
-                params = stat.fit(values)
-                samples = stat.rvs(*params, size=101, random_state=self.random_state)
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=np.percentile(samples, percentiles),
-                        y=np.percentile(values, percentiles),
-                        mode="markers",
-                        parent=col,
-                        child=dist,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Theoretical quantiles",
-            ylabel="Observed quantiles",
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 600),
-            plotname="plot_qq",
-            filename=filename,
-            display=display,
-        )
-
-    @crash
-    def plot_relationships(
-        self,
-        columns: slice | SEQUENCE = (0, 1, 2),
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 900),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot pairwise relationships in a dataset.
-
-        Creates a grid of axes such that each numerical column appears
-        once on the x-axes and once on the y-axes. The bottom triangle
-        contains scatter plots (max 250 random samples), the diagonal
-        plots contain column distributions, and the upper triangle
-        contains contour histograms for all samples in the columns.
-
-        Parameters
-        ----------
-        columns: slice or sequence, default=(0, 1, 2)
-            Columns to plot. Selected categorical columns are ignored.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple, default=(900, 900)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_correlation
-        atom.plots:DataPlot.plot_distribution
-        atom.plots:DataPlot.plot_qq
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.plot_relationships(columns=[0, 4, 5])
-        ```
-
-        """
-        columns = self.branch._get_columns(columns, only_numerical=True)
-
-        # Use max 250 samples to not clutter the plot
-        sample = lambda col: self.dataset[col].sample(
-            n=min(len(self.dataset), 250), random_state=self.random_state
-        )
-
-        fig = self._get_figure()
-        color = BasePlot._fig.get_elem()
-        for i in range(len(columns)**2):
-            x, y = i // len(columns), i % len(columns)
-
-            # Calculate the distance between subplots
-            offset = divide(0.0125, (len(columns) - 1))
-
-            # Calculate the size of the subplot
-            size = (1 - ((offset * 2) * (len(columns) - 1))) / len(columns)
-
-            # Determine the position for the axes
-            x_pos = y * (size + 2 * offset)
-            y_pos = (len(columns) - x - 1) * (size + 2 * offset)
-
-            xaxis, yaxis = BasePlot._fig.get_axes(
-                x=(x_pos, rnd(x_pos + size)),
-                y=(y_pos, rnd(y_pos + size)),
-                coloraxis=dict(
-                    colorscale=PALETTE.get(color, "Blues"),
-                    cmin=0,
-                    cmax=len(self.dataset),
-                    showscale=False,
-                )
-            )
-
-            if x == y:
-                fig.add_trace(
-                    go.Histogram(
-                        x=self.dataset[columns[x]],
-                        marker=dict(
-                            color=f"rgba({color[4:-1]}, 0.2)",
-                            line=dict(width=2, color=color),
-                        ),
-                        name=columns[x],
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-            elif x > y:
-                fig.add_trace(
-                    go.Scatter(
-                        x=sample(columns[y]),
-                        y=sample(columns[x]),
-                        mode="markers",
-                        marker=dict(color=color),
-                        hovertemplate="(%{x}, %{y})<extra></extra>",
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-            elif y > x:
-                fig.add_trace(
-                    go.Histogram2dContour(
-                        x=self.dataset[columns[y]],
-                        y=self.dataset[columns[x]],
-                        coloraxis=f"coloraxis{xaxis[1:]}",
-                        hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-            if x < len(columns) - 1:
-                fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
-            if y > 0:
-                fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
-
-            self._plot(
-                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                xlabel=columns[y] if x == len(columns) - 1 else None,
-                ylabel=columns[x] if y == 0 else None,
-            )
-
-        return self._plot(
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 900),
-            plotname="plot_relationships",
-            filename=filename,
-            display=display,
-        )
-
-    @crash
-    def plot_wordcloud(
-        self,
-        index: SLICE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-        **kwargs,
-    ) -> go.Figure | None:
-        """Plot a wordcloud from the corpus.
-
-        The text for the plot is extracted from the column named
-        `corpus`. If there is no column with that name, an exception
-        is raised.
-
-        Parameters
-        ----------
-        index: int, str, slice, sequence or None, default=None
-            Documents in the corpus to include in the wordcloud. If
-            None, it selects all documents in the dataset.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        **kwargs
-            Additional keyword arguments for the [Wordcloud][] object.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_ngrams
-        atom.plots:PredictionPlot.plot_pipeline
-
-        Examples
-        --------
-        ```pycon
-        import numpy as np
-        from atom import ATOMClassifier
-        from sklearn.datasets import fetch_20newsgroups
-
-        X, y = fetch_20newsgroups(
-            return_X_y=True,
-            categories=["alt.atheism", "sci.med", "comp.windows.x"],
-            shuffle=True,
-            random_state=1,
-        )
-        X = np.array(X).reshape(-1, 1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.textclean()
-        atom.textnormalize()
-        atom.plot_wordcloud()
-        ```
-
-        """
-
-        def get_text(column):
-            """Get the complete corpus as one long string."""
-            if isinstance(column.iat[0], str):
-                return " ".join(column)
-            else:
-                return " ".join([" ".join(row) for row in column])
-
-        check_dependency("wordcloud")
-        from wordcloud import WordCloud
-
-        corpus = get_corpus(self.X)
-        rows = self.dataset.loc[self.branch._get_rows(index, return_test=False)]
-
-        wordcloud = WordCloud(
-            width=figsize[0],
-            height=figsize[1],
-            background_color=kwargs.pop("background_color", "white"),
-            random_state=kwargs.pop("random_state", self.random_state),
-            **kwargs,
-        )
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        fig.add_trace(
-            go.Image(
-                z=wordcloud.generate(get_text(rows[corpus])),
-                hoverinfo="skip",
-                xaxis=xaxis,
-                yaxis=yaxis,
-            )
-        )
-
-        fig.update_layout(
-            {
-                f"xaxis{xaxis[1:]}_showticklabels": False,
-                f"yaxis{xaxis[1:]}_showticklabels": False,
-            }
-        )
-
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 600),
-            plotname="plot_wordcloud",
-            filename=filename,
-            display=display,
-        )
-
-
-@typechecked
-class HTPlot(BasePlot):
-    """Hyperparameter tuning plots.
-
-    Plots that help interpret the model's study and corresponding
-    trials. These plots are accessible from the runners or from the
-    models. If called from a runner, the `models` parameter has to be
-    specified (if None, uses all models). If called from a model, that
-    model is used and the `models` parameter becomes unavailable.
-
-    """
-
-    @composed(crash, plot_from_model)
-    def plot_edf(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the Empirical Distribution Function of a study.
-
-        Use this plot to analyze and improve hyperparameter search
-        spaces. The EDF assumes that the value of the objective
-        function is in accordance with the uniform distribution over
-        the objective space. This plot is only available for models
-        that ran [hyperparameter tuning][].
-
-        !!! note
-            Only complete trials are considered when plotting the EDF.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models that used hyperparameter
-            tuning are selected.
-
-        metric: int, str, sequence or None, default=None
-            Metric to plot (only for multi-metric runs). If str, add `+`
-            between options to select more than one. If None, the metric
-            used to run the pipeline is selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_hyperparameters
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from optuna.distributions import IntDistribution
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-
-        # Run three models with different search spaces
-        atom.run(
-            models="RF_1",
-            n_trials=10,
-            ht_params={"distributions": {"n_estimators": IntDistribution(6, 10)}},
-        )
-        atom.run(
-            models="RF_2",
-            n_trials=10,
-            ht_params={"distributions": {"n_estimators": IntDistribution(11, 15)}},
-        )
-        atom.run(
-            models="RF_3",
-            n_trials=10,
-            ht_params={"distributions": {"n_estimators": IntDistribution(16, 20)}},
-        )
-
-        atom.plot_edf()
-        ```
-
-        """
-        models = check_hyperparams(models, "plot_edf")
-        metric = self._get_metric(metric, max_one=False)
-
-        values = []
-        for m in models:
-            values.append([])
-            for met in metric:
-                values[-1].append(np.array([lst(row)[met] for row in m.trials["score"]]))
-
-        x_min = np.nanmin(np.array(values))
-        x_max = np.nanmax(np.array(values))
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m, val in zip(models, values):
-            for met in metric:
-                fig.add_trace(
-                    self._draw_line(
-                        x=(x := np.linspace(x_min, x_max, 100)),
-                        y=np.sum(val[met][:, np.newaxis] <= x, axis=0) / len(val[met]),
-                        parent=m.name,
-                        child=self._metric[met].name,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            ylim=(0, 1),
-            xlabel="Score",
-            ylabel="Cumulative Probability",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_edf",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_hyperparameter_importance(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: int | str = 0,
-        show: INT | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a model's hyperparameter importance.
-
-        The hyperparameter importance are calculated using the
-        [fANOVA][] importance evaluator. The sum of importances for all
-        parameters (per model) is 1. This plot is only available for
-        models that ran [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models that used hyperparameter
-            tuning are selected.
-
-        metric: int or str, default=0
-            Metric to plot (only for multi-metric runs).
-
-        show: int or None, default=None
-            Number of hyperparameters (ordered by importance) to show.
-            None to show all.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of hyperparameters shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_feature_importance
-        atom.plots:HTPlot.plot_hyperparameters
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["ET", "RF"], n_trials=10)
-        atom.plot_hyperparameter_importance()
-        ```
-
-        """
-        models = check_hyperparams(models, "plot_hyperparameter_importance")
-        params = len(set([k for m in lst(models) for k in m._ht["distributions"]]))
-        met = self._get_metric(metric, max_one=True)
-
-        if show is None or show > params:
-            # Limit max features shown to avoid maximum figsize error
-            show = min(200, params)
-        elif show < 1:
-            raise ValueError(
-                f"Invalid value for the show parameter. Value should be >0, got {show}."
-            )
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            importances = FanovaImportanceEvaluator(seed=self.random_state).evaluate(
-                study=m.study,
-                target=None if len(self._metric) == 1 else lambda x: x.values[met],
-            )
-
-            fig.add_trace(
-                go.Bar(
-                    x=np.array(list(importances.values())) / sum(importances.values()),
-                    y=list(importances.keys()),
-                    orientation="h",
-                    marker=dict(
-                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
-                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    ),
-                    hovertemplate="%{x}<extra></extra>",
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
-                "bargroupgap": 0.05,
-            }
-        )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Normalized hyperparameter importance",
-            ylim=(params - show - 0.5, params - 0.5),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_hyperparameter_importance",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_hyperparameters(
-        self,
-        models: INT | str | MODEL | None = None,
-        params: str | slice | SEQUENCE = (0, 1),
-        metric: int | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot hyperparameter relationships in a study.
-
-        A model's hyperparameters are plotted against each other. The
-        corresponding metric scores are displayed in a contour plot.
-        The markers are the trials in the study. This plot is only
-        available for models that ran [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_hyperparameters()`.
-
-        params: str, slice or sequence, default=(0, 1)
-            Hyperparameters to plot. Use a sequence or add `+` between
-            options to select more than one.
-
-        metric: int or str, default=0
-            Metric to plot (only for multi-metric runs).
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of hyperparameters shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_hyperparameter_importance
-        atom.plots:HTPlot.plot_parallel_coordinate
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR", n_trials=15)
-        atom.plot_hyperparameters(params=(0, 1, 2))
-        ```
-
-        """
-        m = check_hyperparams(models, "plot_hyperparameters")[0]
-
-        if len(params := self._get_hyperparams(params, models)) < 2:
-            raise ValueError(
-                "Invalid value for the hyperparameters parameter. A minimum "
-                f"of two parameters is required, got {len(params)}."
-            )
-
-        met = self._get_metric(metric, max_one=True)
-
-        fig = self._get_figure()
-        for i in range((length := len(params) - 1) ** 2):
-            x, y = i // length, i % length
-
-            if y <= x:
-                # Calculate the size of the subplot
-                size = 1 / length
-
-                # Determine the position for the axes
-                x_pos = y * size
-                y_pos = (length - x - 1) * size
-
-                xaxis, yaxis = BasePlot._fig.get_axes(
-                    x=(x_pos, rnd(x_pos + size)),
-                    y=(y_pos, rnd(y_pos + size)),
-                    coloraxis=dict(
-                        axes="99",
-                        colorscale=PALETTE.get(BasePlot._fig.get_elem(m.name), "Blues"),
-                        cmin=np.nanmin(
-                            m.trials.apply(lambda x: lst(x["score"])[met], axis=1)
-                        ),
-                        cmax=np.nanmax(
-                            m.trials.apply(lambda x: lst(x["score"])[met], axis=1)
-                        ),
-                        showscale=False,
-                    )
-                )
-
-                x_values = lambda row: row["params"].get(params[y], None)
-                y_values = lambda row: row["params"].get(params[x + 1], None)
-
-                fig.add_trace(
-                    go.Scatter(
-                        x=m.trials.apply(x_values, axis=1),
-                        y=m.trials.apply(y_values, axis=1),
-                        mode="markers",
-                        marker=dict(
-                            size=self.marker_size,
-                            color=BasePlot._fig.get_elem(m.name),
-                            line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                        ),
-                        customdata=list(
-                            zip(
-                                m.trials.index.tolist(),
-                                m.trials.apply(lambda x: lst(x["score"])[met], axis=1),
-                            )
-                        ),
-                        hovertemplate=(
-                            f"{params[y]}:%{{x}}<br>"
-                            f"{params[x + 1]}:%{{y}}<br>"
-                            f"{self._metric[met].name}:%{{customdata[1]:.4f}}"
-                            "<extra>Trial %{customdata[0]}</extra>"
-                        ),
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                fig.add_trace(
-                    go.Contour(
-                        x=m.trials.apply(x_values, axis=1),
-                        y=m.trials.apply(y_values, axis=1),
-                        z=m.trials.apply(lambda i: lst(i["score"])[met], axis=1),
-                        contours=dict(
-                            showlabels=True,
-                            labelfont=dict(size=self.tick_fontsize, color="white")
-                        ),
-                        coloraxis="coloraxis99",
-                        hoverinfo="skip",
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                if _is_log_scale(m.study.trials, params[y]):
-                    fig.update_layout({f"xaxis{xaxis[1:]}_type": "log"})
-                if _is_log_scale(m.study.trials, params[x + 1]):
-                    fig.update_layout({f"yaxis{xaxis[1:]}_type": "log"})
-
-                if x < length - 1:
-                    fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
-                if y > 0:
-                    fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
-
-                fig.update_layout(
-                    {
-                        "template": "plotly_white",
-                        f"xaxis{xaxis[1:]}_showgrid": False,
-                        f"yaxis{yaxis[1:]}_showgrid": False,
-                        f"xaxis{yaxis[1:]}_zeroline": False,
-                        f"yaxis{yaxis[1:]}_zeroline": False,
-                    }
-                )
-
-                self._plot(
-                    ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                    xlabel=params[y] if x == length - 1 else None,
-                    ylabel=params[x + 1] if y == 0 else None,
-                )
-
-        BasePlot._fig.used_models.append(m)
-        return self._plot(
-            title=title,
-            legend=legend,
-            figsize=figsize or (800 + 100 * length, 500 + 100 * length),
-            plotname="plot_hyperparameters",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_parallel_coordinate(
-        self,
-        models: INT | str | MODEL | None = None,
-        params: str | slice | SEQUENCE | None = None,
-        metric: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot high-dimensional parameter relationships in a study.
-
-        Every line of the plot represents one trial. This plot is only
-        available for models that ran [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_parallel_coordinate()`.
-
-        params: str, slice, sequence or None, default=None
-            Hyperparameters to plot. Use a sequence or add `+` between
-            options to select more than one. If None, all the model's
-            hyperparameters are selected.
-
-        metric: int or str, default=0
-            Metric to plot (only for multi-metric runs).
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of hyperparameters shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_edf
-        atom.plots:HTPlot.plot_hyperparameter_importance
-        atom.plots:HTPlot.plot_hyperparameters
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("RF", n_trials=15)
-        atom.plot_parallel_coordinate(params=slice(1, 5))
-        ```
-
-        """
-
-        def sort_mixed_types(values: list[str]) -> list[str]:
-            """Sort a sequence of numbers and strings.
-
-            Numbers are converted and take precedence over strings.
-
-            Parameters
-            ----------
-            values: list
-                Values to sort.
-
-            Returns
-            -------
-            list of str
-                Sorted values.
-
-            """
-            numbers, categorical = [], []
-            for elem in values:
-                try:
-                    numbers.append(it(float(elem)))
-                except (TypeError, ValueError):
-                    categorical.append(str(elem))
-
-            return list(map(str, sorted(numbers))) + sorted(categorical)
-
-        m = check_hyperparams(models, "plot_parallel_coordinate")[0]
-        params = self._get_hyperparams(params, models)
-        met = self._get_metric(metric, max_one=True)
-
-        dims = _get_dims_from_info(
-            _get_parallel_coordinate_info(
-                study=m.study,
-                params=params,
-                target=None if len(self._metric) == 1 else lambda x: x.values[met],
-                target_name=self._metric[met].name,
-            )
-        )
-
-        # Clean up dimensions for nicer view
-        for d in [dims[0]] + sorted(dims[1:], key=lambda x: params.index(x["label"])):
-            if "ticktext" in d:
-                # Skip processing for logarithmic params
-                if all(isinstance(i, INT_TYPES) for i in d["values"]):
-                    # Order categorical values
-                    mapping = [d["ticktext"][i] for i in d["values"]]
-                    d["ticktext"] = sort_mixed_types(d["ticktext"])
-                    d["values"] = [d["ticktext"].index(v) for v in mapping]
-            else:
-                # Round numerical values
-                d["tickvals"] = list(
-                    map(rnd, np.linspace(min(d["values"]), max(d["values"]), 5))
-                )
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes(
-            coloraxis=dict(
-                colorscale=PALETTE.get(BasePlot._fig.get_elem(m.name), "Blues"),
-                cmin=min(dims[0]["values"]),
-                cmax=max(dims[0]["values"]),
-                title=self._metric[met].name,
-                font_size=self.label_fontsize,
-            )
-        )
-
-        fig.add_trace(
-            go.Parcoords(
-                dimensions=dims,
-                line=dict(
-                    color=dims[0]["values"],
-                    coloraxis=f"coloraxis{xaxis[1:]}",
-                ),
-                unselected=dict(line=dict(color="gray", opacity=0.5)),
-                labelside="bottom",
-                labelfont=dict(size=self.label_fontsize),
-            )
-        )
-
-        BasePlot._fig.used_models.append(m)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            title=title,
-            legend=legend,
-            figsize=figsize or (700 + len(params) * 50, 600),
-            plotname="plot_parallel_coordinate",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_pareto_front(
-        self,
-        models: INT | str | MODEL | None = None,
-        metric: str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the Pareto front of a study.
-
-        Shows the trial scores plotted against each other. The marker's
-        colors indicate the trial number. This plot is only available
-        for models that ran [multi-metric runs][] with
-        [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_pareto_front()`.
-
-        metric: str, sequence or None, default=None
-            Metrics to plot.  Use a sequence or add `+` between options
-            to select more than one. If None, the metrics used to run
-            the pipeline are selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of metrics shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_edf
-        atom.plots:HTPlot.plot_slice
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(
-            models="RF",
-            metric=["f1", "accuracy", "recall"],
-            n_trials=15,
-         )
-        atom.plot_pareto_front()
-        ```
-
-        """
-        m = check_hyperparams(models, "plot_pareto_front")[0]
-
-        if len(metric := self._get_metric(metric, max_one=False)) < 2:
-            raise ValueError(
-                "Invalid value for the metric parameter. A minimum "
-                f"of two metrics are required, got {len(metric)}."
-            )
-
-        fig = self._get_figure()
-        for i in range((length := len(metric) - 1) ** 2):
-            x, y = i // length, i % length
-
-            if y <= x:
-                # Calculate the distance between subplots
-                offset = divide(0.0125, length - 1)
-
-                # Calculate the size of the subplot
-                size = (1 - ((offset * 2) * (length - 1))) / length
-
-                # Determine the position for the axes
-                x_pos = y * (size + 2 * offset)
-                y_pos = (length - x - 1) * (size + 2 * offset)
-
-                xaxis, yaxis = BasePlot._fig.get_axes(
-                    x=(x_pos, rnd(x_pos + size)),
-                    y=(y_pos, rnd(y_pos + size)),
-                )
-
-                fig.add_trace(
-                    go.Scatter(
-                        x=m.trials.apply(lambda row: row["score"][y], axis=1),
-                        y=m.trials.apply(lambda row: row["score"][x + 1], axis=1),
-                        mode="markers",
-                        marker=dict(
-                            size=self.marker_size,
-                            color=m.trials.index,
-                            colorscale="Teal",
-                            line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                        ),
-                        customdata=m.trials.index,
-                        hovertemplate="(%{x}, %{y})<extra>Trial %{customdata}</extra>",
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                if x < len(metric) - 1:
-                    fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
-                if y > 0:
-                    fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
-
-                self._plot(
-                    ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                    xlabel=self._metric[y].name if x == length - 1 else None,
-                    ylabel=self._metric[x + 1].name if y == 0 else None,
-                )
-
-        BasePlot._fig.used_models.append(m)
-        return self._plot(
-            title=title,
-            legend=legend,
-            figsize=figsize or (500 + 100 * length, 500 + 100 * length),
-            plotname="plot_pareto_front",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_slice(
-        self,
-        models: INT | str | MODEL | None = None,
-        params: str | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the parameter relationship in a study.
-
-        The color of the markers indicate the trial. This plot is only
-        available for models that ran [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_slice()`.
-
-        params: str, slice, sequence or None, default=None
-            Hyperparameters to plot. Use a sequence or add `+` between
-            options to select more than one. If None, all the model's
-            hyperparameters are selected.
-
-        metric: int or str, default=None
-            Metric to plot (only for multi-metric runs). If str, add `+`
-            between options to select more than one. If None, the metric
-            used to run the pipeline is selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of hyperparameters shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_edf
-        atom.plots:HTPlot.plot_hyperparameters
-        atom.plots:HTPlot.plot_parallel_coordinate
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(
-            models="RF",
-            metric=["f1", "recall"],
-            n_trials=15,
-        )
-        atom.plot_slice(params=(0, 1, 2))
-        ```
-
-        """
-        m = check_hyperparams(models, "plot_slice")[0]
-        params = self._get_hyperparams(params, models)
-        metric = self._get_metric(metric, max_one=False)
-
-        fig = self._get_figure()
-        for i in range(len(params) * len(metric)):
-            x, y = i // len(params), i % len(params)
-
-            # Calculate the distance between subplots
-            x_offset = divide(0.0125, (len(params) - 1))
-            y_offset = divide(0.0125, (len(metric) - 1))
-
-            # Calculate the size of the subplot
-            x_size = (1 - ((x_offset * 2) * (len(params) - 1))) / len(params)
-            y_size = (1 - ((y_offset * 2) * (len(metric) - 1))) / len(metric)
-
-            # Determine the position for the axes
-            x_pos = y * (x_size + 2 * x_offset)
-            y_pos = (len(metric) - x - 1) * (y_size + 2 * y_offset)
-
-            xaxis, yaxis = BasePlot._fig.get_axes(
-                x=(x_pos, rnd(x_pos + x_size)),
-                y=(y_pos, rnd(y_pos + y_size)),
-            )
-
-            fig.add_trace(
-                go.Scatter(
-                    x=m.trials.apply(lambda r: r["params"].get(params[y], None), axis=1),
-                    y=m.trials.apply(lambda r: lst(r["score"])[x], axis=1),
-                    mode="markers",
-                    marker=dict(
-                        size=self.marker_size,
-                        color=m.trials.index,
-                        colorscale="Teal",
-                        line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                    ),
-                    customdata=m.trials.index,
-                    hovertemplate="(%{x}, %{y})<extra>Trial %{customdata}</extra>",
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-            if _is_log_scale(m.study.trials, params[y]):
-                fig.update_layout({f"xaxis{xaxis[1:]}_type": "log"})
-
-            if x < len(metric) - 1:
-                fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
-            if y > 0:
-                fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
-
-            self._plot(
-                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-                xlabel=params[y] if x == len(metric) - 1 else None,
-                ylabel=self._metric[x].name if y == 0 else None,
-            )
-
-        BasePlot._fig.used_models.append(m)
-        return self._plot(
-            title=title,
-            legend=legend,
-            figsize=figsize or (800 + 100 * len(params), 500 + 100 * len(metric)),
-            plotname="plot_slice",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_terminator_improvement(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the potentials for future objective improvement.
-
-        This function visualizes the objective improvement potentials.
-        It helps to determine whether you should continue the
-        optimization or not. The evaluated error is also plotted. Note
-        that this function may take some time to compute the improvement
-        potentials. This plot is only available for models that ran
-        [hyperparameter tuning][].
-
-        !!! warning
-            * The plot_terminator_improvement method is only available
-              for models that ran [hyperparameter tuning][] using
-              cross-validation, e.g. using `ht_params={'cv': 5}`.
-            * This method can be slow. Results are cached to fasten
-              repeated calls.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models that used hyperparameter
-            tuning are selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right",
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y)
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_pareto_front
-        atom.plots:HTPlot.plot_timeline
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("RF", n_trials=10, ht_params={"cv": 5})
-        atom.plot_terminator_improvement()
-        ```
-
-        """
-        check_dependency("botorch")
-
-        models = check_hyperparams(models, "plot_terminator_improvement")
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            if m._ht["cv"] > 1:
-                info = self._memory.cache(_get_improvement_info)(m.study, get_error=True)
-            else:
-                raise ValueError(
-                    "The plot_terminator_improvement method is only available for "
-                    "models that ran hyperparameter tuning using cross-validation, "
-                    "e.g. using ht_params={'cv': 5}."
-                )
-
-            fig.add_trace(
-                self._draw_line(
-                    x=m.trials.index,
-                    y=info.improvements,
-                    error_y=dict(type="data", array=info.errors),
-                    mode="markers+lines",
-                    parent=m.name,
-                    legend=legend,
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Trial",
-            ylabel="Terminator improvement",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_terminator_improvement",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_timeline(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the timeline of a study.
-
-        This plot is only available for models that ran
-        [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models that used hyperparameter
-            tuning are selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right",
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y)
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_edf
-        atom.plots:HTPlot.plot_slice
-        atom.plots:HTPlot.plot_terminator_improvement
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from optuna.pruners import PatientPruner
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(
-            models="LGB",
-            n_trials=15,
-            ht_params={"pruner": PatientPruner(None, patience=2)},
-        )
-        atom.plot_timeline()
-        ```
-
-        """
-        models = check_hyperparams(models, "plot_timeline")
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        _cm = {
-            "COMPLETE": BasePlot._fig._palette[0],  # Main color
-            "FAIL": "rgb(255, 0, 0)",  # Red
-            "PRUNED": "rgb(255, 165, 0)",  # Orange
-            "RUNNING": "rgb(124, 252, 0)",  # Green
-            "WAITING": "rgb(220, 220, 220)",  # Gray
-        }
-
-        for m in models:
-            info = []
-            for trial in m.study.get_trials(deepcopy=False):
-                date_complete = trial.datetime_complete or datetime.now()
-                date_start = trial.datetime_start or date_complete
-
-                # Create nice representation of scores and params for hover
-                s = [f'{m}: {trial.values[i]}' for i, m in enumerate(self._metric.keys())]
-                p = [f" --> {k}: {v}" for k, v in trial.params.items()]
-
-                info.append(
-                    Bunch(
-                        number=trial.number,
-                        start=date_start,
-                        duration=1000 * (date_complete - date_start).total_seconds(),
-                        state=trial.state,
-                        hovertext=(
-                            f"Trial: {trial.number}<br>"
-                            f"{'<br>'.join(s)}"
-                            f"Parameters:<br>{'<br>'.join(p)}"
-                        )
-                    )
-                )
-
-            for state in sorted(TrialState, key=lambda x: x.name):
-                if bars := list(filter(lambda x: x.state == state, info)):
-                    fig.add_trace(
-                        go.Bar(
-                            name=state.name,
-                            x=[b.duration for b in bars],
-                            y=[b.number for b in bars],
-                            base=[b.start.isoformat() for b in bars],
-                            text=[b.hovertext for b in bars],
-                            textposition="none",
-                            hovertemplate=f"%{{text}}<extra>{m.name}</extra>",
-                            orientation="h",
-                            marker=dict(
-                                color=f"rgba({_cm[state.name][4:-1]}, 0.2)",
-                                line=dict(width=2, color=_cm[state.name]),
-                            ),
-                            showlegend=BasePlot._fig.showlegend(_cm[state.name], legend),
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        )
-                    )
-
-        fig.update_layout({f"xaxis{yaxis[1:]}_type": "date", "barmode": "group"})
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Datetime",
-            ylabel="Trial",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_timeline",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_trials(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 800),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the hyperparameter tuning trials.
-
-        Creates a figure with two plots: the first plot shows the score
-        of every trial and the second shows the distance between the
-        last consecutive steps. The best trial is indicated with a star.
-        This is the same plot as produced by `ht_params={"plot": True}`.
-        This plot is only available for models that ran
-        [hyperparameter tuning][].
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models that used hyperparameter
-            tuning are selected.
-
-        metric: int, str, sequence or None, default=None
-            Metric to plot (only for multi-metric runs). Add `+` between
-            options to select more than one. If None, all metrics are
-            selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 800)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_evals
-        atom.plots:HTPlot.plot_hyperparameters
-        atom.plots:PredictionPlot.plot_results
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["ET", "RF"], n_trials=15)
-        atom.plot_trials()
-        ```
-
-        """
-        models = check_hyperparams(models, "plot_trials")
-        metric = self._get_metric(metric, max_one=False)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes(y=(0.31, 1.0))
-        xaxis2, yaxis2 = BasePlot._fig.get_axes(y=(0.0, 0.29))
-        for m in models:
-            for met in metric:
-                y = m.trials["score"].apply(lambda value: lst(value)[met])
-
-                # Create star symbol at best trial
-                symbols = ["circle"] * len(y)
-                symbols[m.best_trial.number] = "star"
-                sizes = [self.marker_size] * len(y)
-                sizes[m.best_trial.number] = self.marker_size * 1.5
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=list(range(len(y))),
-                        y=y,
-                        mode="lines+markers",
-                        marker_symbol=symbols,
-                        marker_size=sizes,
-                        hovertemplate=None,
-                        parent=m.name,
-                        child=self._metric[met].name,
-                        legend=legend,
-                        xaxis=xaxis2,
-                        yaxis=yaxis,
-                    )
-                )
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=list(range(1, len(y))),
-                        y=np.abs(np.diff(y)),
-                        mode="lines+markers",
-                        marker_symbol="circle",
-                        parent=m.name,
-                        child=self._metric[met].name,
-                        legend=legend,
-                        xaxis=xaxis2,
-                        yaxis=yaxis2,
-                    )
-                )
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}_anchor": f"x{xaxis2[1:]}",
-                f"xaxis{xaxis[1:]}_showticklabels": False,
-                "hovermode": "x unified",
-            },
-        )
-
-        self._plot(
-            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
-            xlabel="Trial",
-            ylabel="d",
-        )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            ylabel="Score",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_trials",
-            filename=filename,
-            display=display,
-        )
-
-
-@typechecked
-class PredictionPlot(BasePlot):
-    """Prediction plots.
-
-    Plots that use the model's predictions. These plots are accessible
-    from the runners or from the models. If called from a runner, the
-    `models` parameter has to be specified (if None, uses all models).
-    If called from a model, that model is used and the `models` parameter
-    becomes unavailable.
-
-    """
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_calibration(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        n_bins: INT = 10,
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 900),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the calibration curve for a binary classifier.
-
-        Well calibrated classifiers are probabilistic classifiers for
-        which the output of the `predict_proba` method can be directly
-        interpreted as a confidence level. For instance a well
-        calibrated (binary) classifier should classify the samples such
-        that among the samples to which it gave a `predict_proba` value
-        close to 0.8, approx. 80% actually belong to the positive class.
-        Read more in sklearn's [documentation][calibration].
-
-        This figure shows two plots: the calibration curve, where the
-        x-axis represents the average predicted probability in each bin
-        and the y-axis is the fraction of positives, i.e. the proportion
-        of samples whose class is the positive class (in each bin); and
-        a distribution of all predicted probabilities of the classifier.
-        This plot is available only for models with a `predict_proba`
-        method in a binary or [multilabel][] classification task.
-
-        !!! tip
-            Use the [calibrate][adaboost-calibrate] method to calibrate
-            the winning model.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        n_bins: int, default=10
-            Number of bins used for calibration. Minimum of 5 required.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 900)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_lift
-        atom.plots:PredictionPlot.plot_prc
-        atom.plots:PredictionPlot.plot_roc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["RF", "LGB"])
-        atom.plot_calibration()
-        ```
-
-        """
-        check_predict_proba(models, "plot_calibration")
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        if n_bins < 5:
-            raise ValueError(
-                "Invalid value for the n_bins parameter."
-                f"Value should be >=5, got {n_bins}."
-            )
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes(y=(0.31, 1.0))
-        xaxis2, yaxis2 = BasePlot._fig.get_axes(y=(0.0, 0.29))
-        for m in models:
-            for ds in dataset:
-                y_true, y_pred = m._get_pred(ds, target, attr="predict_proba")
-
-                # Get calibration (frac of positives and predicted values)
-                frac_pos, pred = calibration_curve(y_true, y_pred, n_bins=n_bins)
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=pred,
-                        y=frac_pos,
-                        parent=m.name,
-                        child=ds,
-                        mode="lines+markers",
-                        marker_symbol="circle",
-                        legend=legend,
-                        xaxis=xaxis2,
-                        yaxis=yaxis,
-                    )
-                )
-
-                fig.add_trace(
-                    go.Histogram(
-                        x=y_pred,
-                        xbins=dict(start=0, end=1, size=1. / n_bins),
-                        marker=dict(
-                            color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
-                            line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                        ),
-                        name=m.name,
-                        legendgroup=m.name,
-                        showlegend=False,
-                        xaxis=xaxis2,
-                        yaxis=yaxis2,
-                    )
-                )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis2, yaxis=yaxis)
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}_anchor": f"x{xaxis2[1:]}",
-                f"xaxis{xaxis2[1:]}_showgrid": True,
-                "barmode": "overlay",
-            }
-        )
-
-        self._plot(
-            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
-            xlabel="Predicted value",
-            ylabel="Count",
-            xlim=(0, 1),
-        )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            ylabel="Fraction of positives",
-            ylim=(-0.05, 1.05),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_calibration",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task("class"))
-    @composed(crash, plot_from_model)
-    def plot_confusion_matrix(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str = "test",
-        target: INT | str = 0,
-        threshold: FLOAT = 0.5,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a model's confusion matrix.
-
-        For one model, the plot shows a heatmap. For multiple models,
-        it compares TP, FP, FN and TN in a barplot (not implemented
-        for multiclass classification tasks). This plot is available
-        only for classification tasks.
-
-        !!! tip
-            Fill the `threshold` parameter with the result from the
-            model's `get_best_threshold` method to optimize the results.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str, default="test"
-            Data set on which to calculate the confusion matrix. Choose
-            from:` "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multioutput tasks][].
-
-        threshold: float, default=0.5
-            Threshold between 0 and 1 to convert predicted probabilities
-            to class labels. Only for binary classification tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the plot's type.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_calibration
-        atom.plots:PredictionPlot.plot_threshold
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, test_size=0.4)
-        atom.run(["LR", "RF"])
-        atom.lr.plot_confusion_matrix()  # For one model
-        atom.plot_confusion_matrix()  # For multiple models
-        ```
-
-        """
-        ds = self._get_set(dataset, max_one=True)
-        target = self.branch._get_target(target, only_columns=True)
-
-        if self.task.startswith("multiclass") and len(models) > 1:
-            raise NotImplementedError(
-                "The plot_confusion_matrix method does not support "
-                "the comparison of multiple models for multiclass "
-                "or multiclass-multioutput classification tasks."
-            )
-
-        labels = np.array(
-            (("True negatives", "False positives"), ("False negatives", "True positives"))
-        )
-
-        fig = self._get_figure()
-        if len(models) == 1:
-            xaxis, yaxis = BasePlot._fig.get_axes(
-                x=(0, 0.87),
-                coloraxis=dict(
-                    colorscale="Blues",
-                    cmin=0,
-                    cmax=100,
-                    title="Percentage of samples",
-                    font_size=self.label_fontsize,
-                ),
-            )
-        else:
-            xaxis, yaxis = BasePlot._fig.get_axes()
-
-        for m in models:
-            y_true, y_pred = m._get_pred(ds, target, attr="predict")
-            if threshold != 0.5:
-                y_pred = (y_pred > threshold).astype("int")
-
-            cm = confusion_matrix(y_true, y_pred)
-            if len(models) == 1:  # Create matrix heatmap
-                ticks = m.mapping.get(target, np.unique(m.dataset[target]).astype(str))
-                xaxis, yaxis = BasePlot._fig.get_axes(
-                    x=(0, 0.87),
-                    coloraxis=dict(
-                        colorscale="Blues",
-                        cmin=0,
-                        cmax=100,
-                        title="Percentage of samples",
-                        font_size=self.label_fontsize,
-                    ),
-                )
-
-                fig.add_trace(
-                    go.Heatmap(
-                        x=ticks,
-                        y=ticks,
-                        z=100. * cm / cm.sum(axis=1)[:, np.newaxis],
-                        coloraxis=f"coloraxis{xaxis[1:]}",
-                        text=cm,
-                        customdata=labels,
-                        texttemplate="%{text}<br>(%{z:.2f}%)",
-                        textfont=dict(size=self.label_fontsize),
-                        hovertemplate=(
-                            "<b>%{customdata}</b><br>" if is_binary(self.task) else ""
-                            "x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>"
-                        ),
-                        showlegend=False,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                fig.update_layout(
-                    {
-                        "template": "plotly_white",
-                        f"yaxis{yaxis[1:]}_autorange": "reversed",
-                        f"xaxis{xaxis[1:]}_showgrid": False,
-                        f"yaxis{yaxis[1:]}_showgrid": False,
-                    }
-                )
-
-            else:
-                color = BasePlot._fig.get_elem(m.name)
-                fig.add_trace(
-                    go.Bar(
-                        x=cm.ravel(),
-                        y=labels.ravel(),
-                        orientation="h",
-                        marker=dict(
-                            color=f"rgba({color[4:-1]}, 0.2)",
-                            line=dict(width=2, color=color),
-                        ),
-                        hovertemplate="%{x}<extra></extra>",
-                        name=m.name,
-                        legendgroup=m.name,
-                        showlegend=BasePlot._fig.showlegend(m.name, legend),
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                fig.update_layout(bargroupgap=0.05)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Predicted label" if len(models) == 1 else "Count",
-            ylabel="True label" if len(models) == 1 else None,
-            title=title,
-            legend=legend,
-            figsize=figsize or ((800, 800) if len(models) == 1 else (900, 600)),
-            plotname="plot_confusion_matrix",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_det(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ):
-        """Plot the Detection Error Tradeoff curve.
-
-        Read more about [DET][] in sklearn's documentation. Only
-        available for binary classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_gains
-        atom.plots:PredictionPlot.plot_roc
-        atom.plots:PredictionPlot.plot_prc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_det()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            for ds in dataset:
-                # Get fpr-fnr pairs for different thresholds
-                fpr, fnr, _ = det_curve(*m._get_pred(ds, target, attr="thresh"))
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=fpr,
-                        y=fnr,
-                        mode="lines",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="FPR",
-            ylabel="FNR",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_det",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task("reg"))
-    @composed(crash, plot_from_model)
-    def plot_errors(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a model's prediction errors.
-
-        Plot the actual targets from a set against the predicted values
-        generated by the regressor. A linear fit is made on the data.
-        The gray, intersected line shows the identity line. This plot
-        can be useful to detect noise or heteroscedasticity along a
-        range of the target domain. This plot is available only for
-        regression tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str, default="test"
-            Data set on which to calculate the metric. Choose from:
-            "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multioutput tasks][].
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_residuals
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMRegressor
-        from sklearn.datasets import load_diabetes
-
-        X, y = load_diabetes(return_X_y=True, as_frame=True)
-
-        atom = ATOMRegressor(X, y)
-        atom.run(["OLS", "LGB"])
-        atom.plot_errors()
-        ```
-
-        """
-        ds = self._get_set(dataset, max_one=True)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            y_true, y_pred = m._get_pred(ds, target)
-
-            fig.add_trace(
-                go.Scatter(
-                    x=y_true,
-                    y=y_pred,
-                    mode="markers",
-                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-            # Fit the points using linear regression
-            from atom.models import OrdinaryLeastSquares
-            model = OrdinaryLeastSquares(goal=self.goal, branch=m.branch)._get_est()
-            model.fit(y_true.values.reshape(-1, 1), y_pred)
-
-            fig.add_trace(
-                go.Scatter(
-                    x=(x := np.linspace(y_true.min(), y_true.max(), 100)),
-                    y=model.predict(x[:, np.newaxis]),
-                    mode="lines",
-                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    hovertemplate="(%{x}, %{y})<extra></extra>",
-                    legendgroup=m.name,
-                    showlegend=False,
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            xlabel="True value",
-            title=title,
-            legend=legend,
-            ylabel="Predicted value",
-            figsize=figsize,
-            plotname="plot_errors",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(ensembles=False))
-    def plot_evals(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot evaluation curves.
-
-        The evaluation curves are the main metric scores achieved by the
-        models at every iteration of the training process. This plot is
-        available only for models that allow [in-training validation][].
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the evaluation curves. Use a
-            sequence or add `+` between options to select more than one.
-            Choose from: "train" or "test".
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:HTPlot.plot_trials
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["XGB", "LGB"])
-        atom.plot_evals()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False, allow_holdout=False)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            if not m.evals:
-                raise ValueError(
-                    "Invalid value for the models parameter. Model "
-                    f"{m.name} has no in-training validation."
-                )
-
-            for ds in dataset:
-                fig.add_trace(
-                    self._draw_line(
-                        x=list(range(len(m.evals[f"{self._metric[0].name}_{ds}"]))),
-                        y=m.evals[f"{self._metric[0].name}_{ds}"],
-                        marker_symbol="circle",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Iterations",
-            ylabel=self._metric[0].name,
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_evals",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_feature_importance(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        show: INT | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a model's feature importance.
-
-        The sum of importances for all features (per model) is 1.
-        This plot is available only for models whose estimator has
-        a `scores_`, `feature_importances_` or `coef` attribute.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_parshap
-        atom.plots:PredictionPlot.plot_partial_dependence
-        atom.plots:PredictionPlot.plot_permutation_importance
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_feature_importance(show=10)
-        ```
-
-        """
-        show = self._get_show(show, models)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            if (fi := m.feature_importance) is None:
-                raise ValueError(
-                    "Invalid value for the models parameter. The estimator "
-                    f"{m.estimator.__class__.__name__} has no feature_importances_ "
-                    "nor coef_ attribute."
-                )
-
-            fig.add_trace(
-                go.Bar(
-                    x=fi,
-                    y=fi.index,
-                    orientation="h",
-                    marker=dict(
-                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
-                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    ),
-                    hovertemplate="%{x}<extra></extra>",
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
-                "bargroupgap": 0.05,
-            }
-        )
-
-        # Unique number of features over all branches
-        n_fxs = len(set([fx for m in models for fx in m.features]))
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Normalized feature importance",
-            ylim=(n_fxs - show - 0.5, n_fxs - 0.5),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_feature_importance",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task("forecast"))
-    @composed(crash, plot_from_model(check_fitted=False))
-    def plot_forecast(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        fh: int | str | range | SEQUENCE | ForecastingHorizon = "test",
-        X: FEATURES | None = None,
-        target: INT | str = 0,
-        plot_interval: bool = True,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a time series with model forecasts.
-
-        This plot is only available for forecasting tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected. If no
-            models are selected, only the target column is plotted.
-
-        fh: int, str, range, sequence or [ForecastingHorizon][], default="test"
-            Forecast horizon for which to plot the predictions. If
-            string, choose from: "train", "test" or "holdout". Use a
-            sequence or add `+` between options to select more than one.
-
-        X: dataframe-like or None, default=None
-            Exogenous time series corresponding to fh. This parameter
-            is ignored if fh is a data set.
-
-        target: int or str, default=0
-            Target column to look at. Only for [multivariate][] tasks.
-
-        plot_interval: bool, default=True
-            Whether to plot prediction intervals instead of the exact
-            prediction values. If True, the plotted estimators should
-            have a `predict_interval` method.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_lift
-        atom.plots:PredictionPlot.plot_prc
-        atom.plots:PredictionPlot.plot_roc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMForecaster
-        from sktime.datasets import load_airline
-
-        y = load_airline()
-
-        atom = ATOMForecaster(y, random_state=1)
-        atom.plot_forecast()
-        atom.run(
-            models="arima",
-            est_params={"order": (1, 1, 0), "seasonal_order": (0, 1, 0, 12)},
-        )
-        atom.plot_forecast()
-        atom.plot_forecast(fh="train+test", plot_interval=False)
-
-        # Forecast the next 4 years starting from the test set
-        atom.plot_forecast(fh=range(1, 48))
-        ```
-
-        """
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        # Draw original time series
-        for ds in ("train", "test"):
-            fig.add_trace(
-                go.Scatter(
-                    x=self._get_plot_index(getattr(self, ds)),
-                    y=getattr(self, ds)[target],
-                    mode="lines+markers",
-                    line=dict(
-                        width=2,
-                        color="black",
-                        dash=BasePlot._fig.get_elem(ds, "dash"),
-                    ),
-                    opacity=0.6,
-                    name=ds,
-                    showlegend=False if models else BasePlot._fig.showlegend(ds, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        # Draw predictions
-        for m in models:
-            if isinstance(fh, str):
-                # Get fh and corresponding X from data set
-                datasets = self._get_set(fh, max_one=False)
-                fh = bk.concat([getattr(m, ds) for ds in datasets]).index
-                X = m.X.loc[fh]
-
-            y_pred = m.predict(fh, X)
-            if is_multioutput(self.task):
-                y_pred = y_pred[target]
-
-            fig.add_trace(
-                self._draw_line(
-                    x=self._get_plot_index(y_pred),
-                    y=y_pred,
-                    mode="lines+markers",
-                    parent=m.name,
-                    legend=legend,
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-            if plot_interval:
-                try:
-                    y_pred = m.predict_interval(fh, X)
-                except NotImplementedError:
-                    continue  # Fails for some models like ES
-
-                if is_multioutput(self.task):
-                    # Select interval of target column for multivariate
-                    y = y_pred.iloc[:, y_pred.columns.get_loc(target)]
-                else:
-                    y = y_pred  # Univariate
-
-                fig.add_traces(
-                    [
-                        go.Scatter(
-                            x=self._get_plot_index(y_pred),
-                            y=y.iloc[:, 1],
-                            mode="lines",
-                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
-                            hovertemplate=f"%{{y}}<extra>{m.name} - upper bound</extra>",
-                            legendgroup=m.name,
-                            showlegend=False,
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        ),
-                        go.Scatter(
-                            x=self._get_plot_index(y_pred),
-                            y=y.iloc[:, 0],
-                            mode="lines",
-                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
-                            fill="tonexty",
-                            fillcolor=f"rgba{BasePlot._fig.get_elem(m.name)[3:-1]}, 0.2)",
-                            hovertemplate=f"%{{y}}<extra>{m.name} - lower bound</extra>",
-                            legendgroup=m.name,
-                            showlegend=False,
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        )
-                    ]
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup" if plot_interval else "toggleitem",
-            xlabel=self.y.index.name,
-            ylabel=target,
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_forecast",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_gains(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the cumulative gains curve.
-
-        This plot is available only for binary and [multilabel][]
-        classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_det
-        atom.plots:PredictionPlot.plot_lift
-        atom.plots:PredictionPlot.plot_roc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_gains()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            for ds in dataset:
-                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=np.arange(start=1, stop=len(y_true) + 1) / len(y_true),
-                        y=np.cumsum(y_true.iloc[np.argsort(y_pred)[::-1]]) / y_true.sum(),
-                        mode="lines",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Fraction of sample",
-            ylabel="Gain",
-            xlim=(0, 1),
-            ylim=(0, 1.02),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_gains",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(ensembles=False))
-    def plot_learning_curve(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the learning curve: score vs number of training samples.
-
-        This plot is available only for models fitted using
-        [train sizing][]. [Ensembles][] are ignored.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        metric: int, str, sequence or None, default=None
-            Metric to plot (only for multi-metric runs). Use a sequence
-            or add `+` between options to select more than one. If None,
-            the metric used to run the pipeline is selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_results
-        atom.plots:PredictionPlot.plot_successive_halving
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.train_sizing(["LR", "RF"], n_bootstrap=5)
-        atom.plot_learning_curve()
-        ```
-
-        """
-        metric = self._get_metric(metric, max_one=False)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        for met in metric:
-            x, y, std = defaultdict(list), defaultdict(list), defaultdict(list)
-            for m in models:
-                x[m._group].append(m._train_idx)
-                y[m._group].append(get_best_score(m, met))
-                if m.bootstrap is not None:
-                    std[m._group].append(m.bootstrap.iloc[:, met].std())
-
-            for group in x:
-                fig.add_trace(
-                    self._draw_line(
-                        x=x[group],
-                        y=y[group],
-                        mode="lines+markers",
-                        marker_symbol="circle",
-                        error_y=dict(type="data", array=std[group], visible=True),
-                        parent=group,
-                        child=self._metric[met].name,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                # Add error bands
-                if m.bootstrap is not None:
-                    fillcolor = f"rgba{BasePlot._fig.get_elem(group)[3:-1]}, 0.2)"
-                    fig.add_traces(
-                        [
-                            go.Scatter(
-                                x=x[group],
-                                y=np.add(y[group], std[group]),
-                                mode="lines",
-                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
-                                hovertemplate="%{y}<extra>upper bound</extra>",
-                                legendgroup=group,
-                                showlegend=False,
-                                xaxis=xaxis,
-                                yaxis=yaxis,
-                            ),
-                            go.Scatter(
-                                x=x[group],
-                                y=np.subtract(y[group], std[group]),
-                                mode="lines",
-                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
-                                fill="tonexty",
-                                fillcolor=fillcolor,
-                                hovertemplate="%{y}<extra>lower bound</extra>",
-                                legendgroup=group,
-                                showlegend=False,
-                                xaxis=xaxis,
-                                yaxis=yaxis,
-                            ),
-                        ]
-                    )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            title=title,
-            legend=legend,
-            xlabel="Number of training samples",
-            ylabel="Score",
-            figsize=figsize,
-            plotname="plot_learning_curve",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_lift(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the lift curve.
-
-        Only available for binary classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_det
-        atom.plots:PredictionPlot.plot_gains
-        atom.plots:PredictionPlot.plot_prc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_lift()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            for ds in dataset:
-                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
-
-                gains = np.cumsum(y_true.iloc[np.argsort(y_pred)[::-1]]) / y_true.sum()
-                fig.add_trace(
-                    self._draw_line(
-                        x=(x := np.arange(start=1, stop=len(y_true) + 1) / len(y_true)),
-                        y=gains / x,
-                        mode="lines",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        self._draw_straight_line(y=1, xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Fraction of sample",
-            ylabel="Lift",
-            xlim=(0, 1),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_lift",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_parshap(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        columns: SLICE | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the partial correlation of shap values.
-
-        Plots the train and test correlation between the shap value of
-        every feature with its target value, after removing the effect
-        of all other features (partial correlation). This plot is
-        useful to identify the features that are contributing most to
-        overfitting. Features that lie below the bisector (diagonal
-        line) performed worse on the test set than on the training set.
-        If the estimator has a `scores_`, `feature_importances_` or
-        `coef_` attribute, its normalized values are shown in a color
-        map.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        columns: int, str, slice, sequence or None, default=None
-            Features to plot. If None, it plots all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_feature_importance
-        atom.plots:PredictionPlot.plot_partial_dependence
-        atom.plots:PredictionPlot.plot_permutation_importance
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["GNB", "RF"])
-        atom.rf.plot_parshap(legend=None)
-        atom.plot_parshap(columns=slice(5, 10))
-        ```
-
-        """
-        target = self.branch._get_target(target)
-
-        fig = self._get_figure()
-
-        # Colorbar is only needed when a model has feature_importance
-        if all(m.feature_importance is None for m in models):
-            xaxis, yaxis = BasePlot._fig.get_axes()
-        else:
-            xaxis, yaxis = BasePlot._fig.get_axes(
-                x=(0, 0.87),
-                coloraxis=dict(
-                    colorscale="Reds",
-                    title="Normalized feature importance",
-                    font_size=self.label_fontsize,
-                )
-            )
-
-        for m in models:
-            parshap = {}
-            fxs = m.branch._get_columns(columns, include_target=False)
-
-            for ds in ("train", "test"):
-                # Calculating shap values is computationally expensive,
-                # therefore select a random subsample for large data sets
-                if len(data := getattr(m, ds)) > 500:
-                    data = data.sample(500, random_state=self.random_state)
-
-                # Replace data with the calculated shap values
-                explanation = m._shap.get_explanation(data[m.features], target)
-                data[m.features] = explanation.values
-
-                parshap[ds] = pd.Series(index=fxs, dtype=float)
-                for fx in fxs:
-                    # All other features are covariates
-                    covariates = [f for f in data.columns[:-1] if f != fx]
-                    cols = [fx, data.columns[-1], *covariates]
-
-                    # Compute covariance
-                    V = data[cols].cov()
-
-                    # Inverse covariance matrix
-                    Vi = np.linalg.pinv(V, hermitian=True)
-                    diag = Vi.diagonal()
-
-                    D = np.diag(np.sqrt(1 / diag))
-
-                    # Partial correlation matrix
-                    partial_corr = -1 * (D @ Vi @ D)  # @ is matrix multiplication
-
-                    # Semi-partial correlation matrix
-                    with np.errstate(divide="ignore"):
-                        V_sqrt = np.sqrt(np.diag(V))[..., None]
-                        Vi_sqrt = np.sqrt(np.abs(diag - Vi ** 2 / diag[..., None])).T
-                        semi_partial_correlation = partial_corr / V_sqrt / Vi_sqrt
-
-                    # X covariates are removed
-                    parshap[ds][fx] = semi_partial_correlation[1, 0]
-
-            # Get the feature importance or coefficients
-            if m.feature_importance is not None:
-                color = m.feature_importance.loc[fxs]
-            else:
-                color = BasePlot._fig.get_elem("parshap")
-
-            fig.add_trace(
-                go.Scatter(
-                    x=parshap["train"],
-                    y=parshap["test"],
-                    mode="markers+text",
-                    marker=dict(
-                        color=color,
-                        size=self.marker_size,
-                        coloraxis=f"coloraxis{xaxis[1:]}",
-                        line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
-                    ),
-                    text=m.features,
-                    textposition="top center",
-                    customdata=(data := None if isinstance(color, str) else list(color)),
-                    hovertemplate=(
-                        f"%{{text}}<br>(%{{x}}, %{{y}})"
-                        f"{'<br>Feature importance: %{customdata:.4f}' if data else ''}"
-                        f"<extra>{m.name}</extra>"
-                    ),
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Training set",
-            ylabel="Test set",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_parshap",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_partial_dependence(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        columns: SLICE | None = None,
-        kind: str | SEQUENCE = "average",
-        pair: int | str | None = None,
-        target: INT | str = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the partial dependence of features.
-
-        The partial dependence of a feature (or a set of features)
-        corresponds to the response of the model for each possible
-        value of the feature. The plot can take two forms:
-
-        - If `pair` is None: Single feature partial dependence lines.
-          The deciles of the feature values are shown with tick marks
-          on the bottom.
-        - If `pair` is defined: Two-way partial dependence plots are
-          plotted as contour plots (only allowed for a single model).
-
-        Read more about partial dependence on sklearn's
-        [documentation][partial_dependence]. This plot is not available
-        for multilabel nor multiclass-multioutput classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        columns: int, str, slice, sequence or None, default=None
-            Features to get the partial dependence from. If None, it
-            uses the first 3 features in the dataset.
-
-        kind: str or sequence, default="average"
-            Kind of depedence to plot. Use a sequence or add `+` between
-            options to select more than one. Choose from:
-
-            - "average": Partial dependence averaged across all samples
-              in the dataset.
-            - "individual": Partial dependence for up to 50 random
-              samples (Individual Conditional Expectation).
-
-            This parameter is ignored when plotting feature pairs.
-
-        pair: int, str or None, default=None
-            Feature with which to pair the features selected by
-            `columns`. If specified, the resulting figure displays
-            contour plots. Only allowed when plotting a single model.
-            If None, the plots show the partial dependece of single
-            features.
-
-        target: int or str, default=1
-            Class in the target column to look at (only for multiclass
-            classification tasks).
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_feature_importance
-        atom.plots:PredictionPlot.plot_parshap
-        atom.plots:PredictionPlot.plot_permutation_importance
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_partial_dependence(kind="average+individual", legend="upper left")
-        atom.rf.plot_partial_dependence(columns=(3, 4), pair=2)
-        ```
-
-        """
-        if any(self.task.startswith(t) for t in ("multilabel", "multiclass-multioutput")):
-            raise PermissionError(
-                "The plot_partial_dependence method is not available for multilabel "
-                f"nor multiclass-multioutput classification tasks, got {self.task}."
-            )
-        elif self.task.startswith("multiclass"):
-            _, target = self.branch._get_target(target)
-        else:
-            target = 0
-
-        kind = "+".join(lst(kind)).lower()
-        if any(k not in ("average", "individual") for k in kind.split("+")):
-            raise ValueError(
-                f"Invalid value for the kind parameter, got {kind}. "
-                "Choose from: average, individual."
-            )
-
-        axes, names = [], []
-        fig = self._get_figure()
-        for m in models:
-            color = BasePlot._fig.get_elem(m.name)
-
-            # Since every model can have different fxs, select them
-            # every time and make sure the models use the same fxs
-            cols = m.branch._get_columns(
-                columns=(0, 1, 2) if columns is None else columns,
-                include_target=False,
-            )
-
-            if not names:
-                names = cols
-            elif names != cols:
-                raise ValueError(
-                    "Invalid value for the columns parameter. Not all "
-                    f"models use the same features, got {names} and {cols}."
-                )
-
-            if pair is not None:
-                if len(models) > 1:
-                    raise ValueError(
-                        f"Invalid value for the pair parameter, got {pair}. "
-                        "The value must be None when plotting multiple models"
-                    )
-                else:
-                    pair = m.branch._get_columns(pair, include_target=False)
-                    cols = [(c, pair[0]) for c in cols]
-            else:
-                cols = [(c,) for c in cols]
-
-            # Create new axes
-            if not axes:
-                for i, col in enumerate(cols):
-                    # Calculate the distance between subplots
-                    offset = divide(0.025, len(cols) - 1)
-
-                    # Calculate the size of the subplot
-                    size = (1 - ((offset * 2) * (len(cols) - 1))) / len(cols)
-
-                    # Determine the position for the axes
-                    x_pos = i % len(cols) * (size + 2 * offset)
-
-                    xaxis, yaxis = BasePlot._fig.get_axes(x=(x_pos, rnd(x_pos + size)))
-                    axes.append((xaxis, yaxis))
-
-            # Compute averaged predictions
-            predictions = Parallel(n_jobs=self.n_jobs, backend=self.backend)(
-                delayed(partial_dependence)(
-                    estimator=m.estimator,
-                    X=m.X_test,
-                    features=col,
-                    kind="both" if "individual" in kind else "average",
-                ) for col in cols
-            )
-
-            # Compute deciles for ticks (only if line plots)
-            if len(cols[0]) == 1:
-                deciles = {}
-                for fx in chain.from_iterable(cols):
-                    if fx not in deciles:  # Skip if the feature is repeated
-                        X_col = _safe_indexing(m.X_test, fx, axis=1)
-                        deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
-
-            for i, (ax, fx, pred) in enumerate(zip(axes, cols, predictions)):
-                # Draw line or contour plot
-                if len(pred["values"]) == 1:
-                    # For both average and individual: draw ticks on the horizontal axis
-                    for line in deciles[fx[0]]:
-                        fig.add_shape(
-                            type="line",
-                            x0=line,
-                            x1=line,
-                            xref=ax[0],
-                            y0=0,
-                            y1=0.05,
-                            yref=f"{axes[0][1]} domain",
-                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
-                            opacity=0.6,
-                            layer="below",
-                        )
-
-                    # Draw the mean of the individual lines
-                    if "average" in kind:
-                        fig.add_trace(
-                            go.Scatter(
-                                x=pred["values"][0],
-                                y=pred["average"][target].ravel(),
-                                mode="lines",
-                                line=dict(width=2, color=color),
-                                name=m.name,
-                                legendgroup=m.name,
-                                showlegend=BasePlot._fig.showlegend(m.name, legend),
-                                xaxis=ax[0],
-                                yaxis=axes[0][1],
-                            )
-                        )
-
-                    # Draw all individual (per sample) lines (ICE)
-                    if "individual" in kind:
-                        # Select up to 50 random samples to plot
-                        idx = np.random.choice(
-                            list(range(len(pred["individual"][target]))),
-                            size=min(len(pred["individual"][target]), 50),
-                            replace=False,
-                        )
-                        for sample in pred["individual"][target, idx, :]:
-                            fig.add_trace(
-                                go.Scatter(
-                                    x=pred["values"][0],
-                                    y=sample,
-                                    mode="lines",
-                                    line=dict(width=0.5, color=color),
-                                    name=m.name,
-                                    legendgroup=m.name,
-                                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                                    xaxis=ax[0],
-                                    yaxis=axes[0][1],
-                                )
-                            )
-
-                else:
-                    colorscale = PALETTE.get(BasePlot._fig.get_elem(m.name), "Teal")
-                    fig.add_trace(
-                        go.Contour(
-                            x=pred["values"][0],
-                            y=pred["values"][1],
-                            z=pred["average"][target],
-                            contours=dict(
-                                showlabels=True,
-                                labelfont=dict(size=self.tick_fontsize, color="white")
-                            ),
-                            hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
-                            hoverongaps=False,
-                            colorscale=colorscale,
-                            showscale=False,
-                            showlegend=False,
-                            xaxis=ax[0],
-                            yaxis=axes[0][1],
-                        )
-                    )
-
-                self._plot(
-                    ax=(f"xaxis{ax[0][1:]}", f"yaxis{ax[1][1:]}"),
-                    xlabel=fx[0],
-                    ylabel=(fx[1] if len(fx) > 1 else "Score") if i == 0 else None,
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            groupclick="togglegroup",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_partial_dependence",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_permutation_importance(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        show: INT | None = None,
-        n_repeats: INT = 10,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the feature permutation importance of models.
-
-        !!! warning
-            This method can be slow. Results are cached to fasten
-            repeated calls.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        n_repeats: int, default=10
-            Number of times to permute each feature.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_feature_importance
-        atom.plots:PredictionPlot.plot_partial_dependence
-        atom.plots:PredictionPlot.plot_parshap
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_permutation_importance(show=10, n_repeats=7)
-        ```
-
-        """
-        show = self._get_show(show, models)
-
-        if n_repeats <= 0:
-            raise ValueError(
-                "Invalid value for the n_repeats parameter."
-                f"Value should be >0, got {n_repeats}."
-            )
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        for m in models:
-            # Permutation importances returns Bunch object
-            permutations = self._memory.cache(permutation_importance)(
-                estimator=m.estimator,
-                X=m.X_test,
-                y=m.y_test,
-                scoring=self._metric[0],
-                n_repeats=n_repeats,
-                n_jobs=self.n_jobs,
-                random_state=self.random_state,
-            )
-
-            fig.add_trace(
-                go.Box(
-                    x=permutations["importances"].ravel(),
-                    y=list(np.array([[fx] * n_repeats for fx in m.features]).ravel()),
-                    marker_color=BasePlot._fig.get_elem(m.name),
-                    boxpoints="outliers",
-                    orientation="h",
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
-                "boxmode": "group",
-            }
-        )
-
-        # Unique number of features over all branches
-        n_fxs = len(set([fx for m in models for fx in m.features]))
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Score",
-            ylim=(n_fxs - show - 0.5, n_fxs - 0.5),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_permutation_importance",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(check_fitted=False))
-    def plot_pipeline(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        draw_hyperparameter_tuning: bool = True,
-        color_branches: bool | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot a diagram of the pipeline.
-
-        !!! warning
-            This plot uses the [schemdraw][] package, which is
-            incompatible with [plotly][]. The returned plot is
-            therefore a [matplotlib figure][pltfigure].
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models for which to draw the pipeline. If None, all
-            pipelines are plotted.
-
-        draw_hyperparameter_tuning: bool, default=True
-            Whether to draw if the models used Hyperparameter Tuning.
-
-        color_branches: bool or None, default=None
-            Whether to draw every branch in a different color. If None,
-            branches are colored when there is more than one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the pipeline drawn.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:DataPlot.plot_wordcloud
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["GNB", "RNN", "SGD", "MLP"])
-        atom.voting(models=atom.winners[:2])
-        atom.plot_pipeline()
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.scale()
-        atom.prune()
-        atom.run("RF", n_trials=30)
-
-        atom.branch = "undersample"
-        atom.balance("nearmiss")
-        atom.run("RF_undersample")
-
-        atom.branch = "oversample_from_master"
-        atom.balance("smote")
-        atom.run("RF_oversample")
-
-        atom.plot_pipeline()
-        ```
-
-        """
-
-        def get_length(pl, i):
-            """Get the maximum length of the name of a block."""
-            if len(pl) > i:
-                return max(len(pl[i].__class__.__name__) * 0.5, 7)
-            else:
-                return 0
-
-        def check_y(xy):
-            """Return y unless there is something right, then jump."""
-            while any(pos[0] > xy[0] and pos[1] == xy[1] for pos in positions.values()):
-                xy = Point((xy[0], xy[1] + height))
-
-            return xy[1]
-
-        def add_wire(x, y):
-            """Draw a connecting wire between two estimators."""
-            d.add(
-                Wire(shape="z", k=(x - d.here[0]) / (length + 1), arrow="->")
-                .to((x, y))
-                .color(branch["color"])
-            )
-
-            # Update arrowhead manually
-            d.elements[-1].segments[-1].arrowwidth = 0.3
-            d.elements[-1].segments[-1].arrowlength = 0.5
-
-        check_dependency("schemdraw")
-        from schemdraw import Drawing
-        from schemdraw.flow import Data, RoundBox, Subroutine, Wire
-        from schemdraw.util import Point
-
-        fig = self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_pipeline")
-
-        # Define branches to plot (if called from model, it's only one)
-        branches = []
-        for branch in getattr(self, "_branches", [self.branch]):
-            draw_models, draw_ensembles = [], []
-            for m in models:
-                if m.branch is branch:
-                    if m.acronym not in ("Stack", "Vote"):
-                        draw_models.append(m)
-                    else:
-                        draw_ensembles.append(m)
-
-                        # Additionally, add all dependent models (if not already there)
-                        draw_models.extend([i for i in m._models if i not in draw_models])
-
-            if not models or draw_models:
-                branches.append(
-                    {
-                        "name": branch.name,
-                        "pipeline": list(branch.pipeline),
-                        "models": draw_models,
-                        "ensembles": draw_ensembles,
-                    }
-                )
-
-        # Define colors per branch
-        for branch in branches:
-            if color_branches or (color_branches is None and len(branches) > 1):
-                color = next(BasePlot._fig.palette)
-
-                # Convert back to format accepted by matplotlib
-                branch["color"] = unconvert_from_RGB_255(unlabel_rgb(color))
-            else:
-                branch["color"] = "black"
-
-        # Create schematic drawing
-        d = Drawing(unit=1, backend="matplotlib")
-        d.config(fontsize=self.tick_fontsize)
-        d.add(Subroutine(w=8, s=0.7).label("Raw data"))
-
-        height = 3  # Height of every block
-        length = 5  # Minimum arrow length
-
-        # Define the x-position for every block
-        x_pos = [d.here[0] + length]
-        for i in range(max(len(b["pipeline"]) for b in branches)):
-            len_block = reduce(max, [get_length(b["pipeline"], i) for b in branches])
-            x_pos.append(x_pos[-1] + length + len_block)
-
-        # Add positions for scaling, hyperparameter tuning and models
-        x_pos.extend([x_pos[-1], x_pos[-1]])
-        if any(m.scaler for m in models):
-            x_pos[-1] = x_pos[-2] = x_pos[-3] + length + 7
-        if draw_hyperparameter_tuning and any(m.trials is not None for m in models):
-            x_pos[-1] = x_pos[-2] + length + 11
-
-        positions = {0: d.here}  # Contains the position of every element
-        for branch in branches:
-            d.here = positions[0]
-
-            for i, est in enumerate(branch["pipeline"]):
-                # If the estimator has already been seen, don't draw
-                if id(est) in positions:
-                    # Change location to estimator's end
-                    d.here = positions[id(est)]
-                    continue
-
-                # Draw transformer
-                add_wire(x_pos[i], check_y(d.here))
-                d.add(
-                    RoundBox(w=max(len(est.__class__.__name__) * 0.5, 7))
-                    .label(est.__class__.__name__, color="k")
-                    .color(branch["color"])
-                    .anchor("W")
-                    .drop("E")
-                )
-
-                positions[id(est)] = d.here
-
-            for model in branch["models"]:
-                # Position at last transformer or at start
-                if branch["pipeline"]:
-                    d.here = positions[id(est)]
-                else:
-                    d.here = positions[0]
-
-                # For a single branch, center models
-                if len(branches) == 1:
-                    offset = height * (len(branch["models"]) - 1) / 2
-                else:
-                    offset = 0
-
-                # Draw automated feature scaling
-                if model.scaler:
-                    add_wire(x_pos[-3], check_y((d.here[0], d.here[1] - offset)))
-                    d.add(
-                        RoundBox(w=7)
-                        .label("Scaler", color="k")
-                        .color(branch["color"])
-                        .drop("E")
-                    )
-                    offset = 0
-
-                # Draw hyperparameter tuning
-                if draw_hyperparameter_tuning and model.trials is not None:
-                    add_wire(x_pos[-2], check_y((d.here[0], d.here[1] - offset)))
-                    d.add(
-                        Data(w=11)
-                        .label("Hyperparameter\nTuning", color="k")
-                        .color(branch["color"])
-                        .drop("E")
-                    )
-                    offset = 0
-
-                # Remove classifier/regressor from model's name
-                name = model.estimator.__class__.__name__
-                if name.lower().endswith("classifier"):
-                    name = name[:-10]
-                elif name.lower().endswith("regressor"):
-                    name = name[:-9]
-
-                # Draw model
-                add_wire(x_pos[-1], check_y((d.here[0], d.here[1] - offset)))
-                d.add(
-                    Data(w=max(len(name) * 0.5, 7))
-                    .label(name, color="k")
-                    .color(branch["color"])
-                    .anchor("W")
-                    .drop("E")
-                )
-
-                positions[id(model)] = d.here
-
-        # Draw ensembles
-        max_pos = max(pos[0] for pos in positions.values())  # Max length model names
-        for branch in branches:
-            for model in branch["ensembles"]:
-                # Determine y-position of the ensemble
-                y_pos = [positions[id(m)][1] for m in model._models]
-                offset = height / 2 * (len(branch["ensembles"]) - 1)
-                y = min(y_pos) + (max(y_pos) - min(y_pos)) * 0.5 - offset
-                y = check_y((max_pos + length, max(min(y_pos), y)))
-
-                d.here = (max_pos + length, y)
-
-                d.add(
-                    Data(w=max(len(model._fullname) * 0.5, 7))
-                    .label(model._fullname, color="k")
-                    .color(branch["color"])
-                    .anchor("W")
-                    .drop("E")
-                )
-
-                positions[id(model)] = d.here
-
-                # Draw a wire from every model to the ensemble
-                for m in model._models:
-                    d.here = positions[id(m)]
-                    add_wire(max_pos + length, y)
-
-        if not figsize:
-            dpi, bbox = fig.get_dpi(), d.get_bbox()
-            figsize = (dpi * bbox.xmax // 4, (dpi / 2) * (bbox.ymax - bbox.ymin))
-
-        d.draw(canvas=plt.gca(), showframe=False, show=False)
-        plt.axis("off")
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=plt.gca(),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_pipeline",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_prc(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the precision-recall curve.
-
-        Read more about [PRC][] in sklearn's documentation. Only
-        available for binary classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_det
-        atom.plots:PredictionPlot.plot_lift
-        atom.plots:PredictionPlot.plot_roc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_prc()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            for ds in dataset:
-                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
-
-                # Get precision-recall pairs for different thresholds
-                prec, rec, _ = precision_recall_curve(y_true, y_pred)
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=rec,
-                        y=prec,
-                        mode="lines",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        self._draw_straight_line(sum(m.y_test) / len(m.y_test), xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Recall",
-            ylabel="Precision",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_prc",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task("class"))
-    @composed(crash, plot_from_model)
-    def plot_probabilities(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str = "test",
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the probability distribution of the target classes.
-
-        This plot is available only for models with a `predict_proba`
-        method in classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str, default="test"
-            Data set on which to calculate the metric. Choose from:
-            "train", "test" or "holdout".
-
-        target: int, str or tuple, default=1
-            Probability of being that class in the target column. For
-            multioutput tasks, the value should be a tuple of the form
-            (column, class).
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_confusion_matrix
-        atom.plots:PredictionPlot.plot_results
-        atom.plots:PredictionPlot.plot_threshold
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_probabilities()
-        ```
-
-        """
-        check_predict_proba(models, "plot_probabilities")
-        ds = self._get_set(dataset, max_one=True)
-        col, cls = self.branch._get_target(target)
-        col = lst(self.target)[col]
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            y_true, y_pred = getattr(m, f"y_{ds}"), getattr(m, f"predict_proba_{ds}")
-            for value in np.unique(m.dataset[col]):
-                # Get indices per class
-                if is_multioutput(self.task):
-                    if self.task.startswith("multilabel"):
-                        hist = y_pred.loc[y_true[col] == value, col]
-                    else:
-                        hist = y_pred.loc[cls, col].loc[y_true[col] == value]
-                else:
-                    hist = y_pred.loc[y_true == value, str(cls)]
-
-                fig.add_trace(
-                    go.Scatter(
-                        x=(x := np.linspace(0, 1, 100)),
-                        y=stats.gaussian_kde(hist)(x),
-                        mode="lines",
-                        line=dict(
-                            width=2,
-                            color=BasePlot._fig.get_elem(m.name),
-                            dash=BasePlot._fig.get_elem(ds, "dash"),
-                        ),
-                        fill="tonexty",
-                        fillcolor=f"rgba{BasePlot._fig.get_elem(m.name)[3:-1]}, 0.2)",
-                        fillpattern=dict(shape=BasePlot._fig.get_elem(value, "shape")),
-                        name=f"{col}={value}",
-                        legendgroup=m.name,
-                        legendgrouptitle=dict(text=m.name, font_size=self.label_fontsize),
-                        showlegend=BasePlot._fig.showlegend(f"{m.name}-{value}", legend),
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="toggleitem",
-            xlabel="Probability",
-            ylabel="Probability density",
-            xlim=(0, 1),
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_probabilities",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task("reg"))
-    @composed(crash, plot_from_model)
-    def plot_residuals(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "upper left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot a model's residuals.
-
-        The plot shows the residuals (difference between the predicted
-        and the true value) on the vertical axis and the independent
-        variable on the horizontal axis. The gray, intersected line
-        shows the identity line. This plot can be useful to analyze the
-        variance of the error of the regressor. If the points are
-        randomly dispersed around the horizontal axis, a linear
-        regression model is appropriate for the data; otherwise, a
-        non-linear model is more appropriate. This plot is only
-        available for regression tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str, default="test"
-            Data set on which to calculate the metric. Choose from:
-            "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multioutput tasks][].
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="upper left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_errors
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMRegressor
-        from sklearn.datasets import load_diabetes
-
-        X, y = load_diabetes(return_X_y=True, as_frame=True)
-
-        atom = ATOMRegressor(X, y)
-        atom.run(["OLS", "LGB"])
-        atom.plot_residuals()
-        ```
-
-        """
-        ds = self._get_set(dataset, max_one=True)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes(x=(0, 0.69))
-        xaxis2, yaxis2 = BasePlot._fig.get_axes(x=(0.71, 1.0))
-        for m in models:
-            y_true, y_pred = m._get_pred(ds, target)
-
-            fig.add_trace(
-                go.Scatter(
-                    x=y_true,
-                    y=(res := np.subtract(y_true, y_pred)),
-                    mode="markers",
-                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=BasePlot._fig.showlegend(m.name, legend),
-                    xaxis=xaxis,
-                    yaxis=yaxis,
-                )
-            )
-
-            fig.add_trace(
-                go.Histogram(
-                    y=res,
-                    bingroup="residuals",
-                    marker=dict(
-                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
-                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
-                    ),
-                    name=m.name,
-                    legendgroup=m.name,
-                    showlegend=False,
-                    xaxis=xaxis2,
-                    yaxis=yaxis,
-                )
-            )
-
-        self._draw_straight_line(y=0, xaxis=xaxis, yaxis=yaxis)
-
-        fig.update_layout({f"yaxis{xaxis[1:]}_showgrid": True, "barmode": "overlay"})
-
-        self._plot(
-            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
-            xlabel="Distribution",
-            title=title,
-        )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            ylabel="Residuals",
-            xlabel="True value",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_residuals",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model)
-    def plot_results(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the model results.
-
-        If all models applied bootstrap, the plot is a boxplot. If
-        not, the plot is a barplot. Models are ordered based on
-        their score from the top down. The score is either the
-        `score_bootstrap` or `score_test` attribute of the model,
-        selected in that order.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        metric: int, str, sequence or None, default=None
-            Metric to plot (only for multi-metric runs). Other available
-            options are "time_bo", "time_fit", "time_bootstrap" and
-            "time". If str, add `+` between options to select more than
-            one. If None, the metric used to run the pipeline is selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of models.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_confusion_matrix
-        atom.plots:PredictionPlot.plot_probabilities
-        atom.plots:PredictionPlot.plot_threshold
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["GNB", "LR", "RF", "LGB"], metric=["f1", "recall"])
-        atom.plot_results()
-
-        atom.run(["GNB", "LR", "RF", "LGB"], metric=["f1", "recall"], n_bootstrap=5)
-        atom.plot_results()
-        atom.plot_results(metric="time_fit+time")
-        ```
-
-        """
-
-        def get_std(model: MODEL, metric: int) -> SCALAR:
-            """Get the standard deviation of the bootstrap scores.
-
-            Parameters
-            ----------
-            model: Model
-                 Model to get the std from.
-
-            metric: int
-                Index of the metric to get it from.
-
-            Returns
-            -------
-            int or float
-                Standard deviation score or 0 if not bootstrapped.
-
-            """
-            if model.bootstrap is None:
-                return 0
-            else:
-                return model.bootstrap.iloc[:, metric].std()
-
-        metric = self._get_metric(metric, max_one=False)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        for met in metric:
-            if isinstance(met, str):
-                color = BasePlot._fig.get_elem(met)
-                fig.add_trace(
-                    go.Bar(
-                        x=[getattr(m, met) for m in models],
-                        y=[m.name for m in models],
-                        orientation="h",
-                        marker=dict(
-                            color=f"rgba({color[4:-1]}, 0.2)",
-                            line=dict(width=2, color=color),
-                        ),
-                        hovertemplate=f"%{{x}}<extra>{met}</extra>",
-                        name=met,
-                        legendgroup=met,
-                        showlegend=BasePlot._fig.showlegend(met, legend),
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-            else:
-                name = self._metric[met].name
-                color = BasePlot._fig.get_elem()
-
-                if all(m.score_bootstrap for m in models):
-                    x = np.array([m.bootstrap.iloc[:, met] for m in models]).ravel()
-                    y = np.array([[m.name] * len(m.bootstrap) for m in models]).ravel()
-                    fig.add_trace(
-                        go.Box(
-                            x=x,
-                            y=list(y),
-                            marker_color=color,
-                            boxpoints="outliers",
-                            orientation="h",
-                            name=name,
-                            legendgroup=name,
-                            showlegend=BasePlot._fig.showlegend(name, legend),
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        )
-                    )
-                else:
-                    fig.add_trace(
-                        go.Bar(
-                            x=[get_best_score(m, met) for m in models],
-                            y=[m.name for m in models],
-                            error_x=dict(
-                                type="data",
-                                array=[get_std(m, met) for m in models],
-                            ),
-                            orientation="h",
-                            marker=dict(
-                                color=f"rgba({color[4:-1]}, 0.2)",
-                                line=dict(width=2, color=color),
-                            ),
-                            hovertemplate="%{x}<extra></extra>",
-                            name=name,
-                            legendgroup=name,
-                            showlegend=BasePlot._fig.showlegend(name, legend),
-                            xaxis=xaxis,
-                            yaxis=yaxis,
-                        )
-                    )
-
-        fig.update_layout(
-            {
-                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
-                "bargroupgap": 0.05,
-                "boxmode": "group",
-            }
-        )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="time (s)" if all(isinstance(m, str) for m in metric) else "Score",
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + len(models) * 50),
-            plotname="plot_results",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_roc(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        dataset: str | SEQUENCE = "test",
-        target: INT | str = 0,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot the Receiver Operating Characteristics curve.
-
-        Read more about [ROC][] in sklearn's documentation. Only
-        available for classification tasks.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        dataset: str or sequence, default="test"
-            Data set on which to calculate the metric. Use a sequence
-            or add `+` between options to select more than one. Choose
-            from: "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_gains
-        atom.plots:PredictionPlot.plot_lift
-        atom.plots:PredictionPlot.plot_prc
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_roc()
-        ```
-
-        """
-        dataset = self._get_set(dataset, max_one=False)
-        target = self.branch._get_target(target, only_columns=True)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-        for m in models:
-            for ds in dataset:
-                # Get False (True) Positive Rate as arrays
-                fpr, tpr, _ = roc_curve(*m._get_pred(ds, target, attr="thresh"))
-
-                fig.add_trace(
-                    self._draw_line(
-                        x=fpr,
-                        y=tpr,
-                        mode="lines",
-                        parent=m.name,
-                        child=ds,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlim=(-0.03, 1.03),
-            ylim=(-0.03, 1.03),
-            xlabel="FPR",
-            ylabel="TPR",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_roc",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(ensembles=False))
-    def plot_successive_halving(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: INT | str | SEQUENCE | None = None,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower right",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot scores per iteration of the successive halving.
-
-        Only use with models fitted using [successive halving][].
-        [Ensembles][] are ignored.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        metric: int, str, sequence or None, default=None
-            Metric to plot (only for multi-metric runs). Use a sequence
-            or add `+` between options to select more than one. If None,
-            the metric used to run the pipeline is selected.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower right"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_learning_curve
-        atom.plots:PredictionPlot.plot_results
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.successive_halving(["Tree", "Bag", "RF", "LGB"], n_bootstrap=5)
-        atom.plot_successive_halving()
-        ```
-
-        """
-        metric = self._get_metric(metric, max_one=False)
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        for met in metric:
-            x, y, std = defaultdict(list), defaultdict(list), defaultdict(list)
-            for m in models:
-                x[m._group].append(len(m.branch._idx[1]) // m._train_idx)
-                y[m._group].append(get_best_score(m, met))
-                if m.bootstrap is not None:
-                    std[m._group].append(m.bootstrap.iloc[:, met].std())
-
-            for group in x:
-                fig.add_trace(
-                    self._draw_line(
-                        x=x[group],
-                        y=y[group],
-                        mode="lines+markers",
-                        marker_symbol="circle",
-                        error_y=dict(type="data", array=std[group], visible=True),
-                        parent=group,
-                        child=self._metric[met].name,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-                # Add error bands
-                if m.bootstrap is not None:
-                    fillcolor = f"rgba{BasePlot._fig.get_elem(group)[3:-1]}, 0.2)"
-                    fig.add_traces(
-                        [
-                            go.Scatter(
-                                x=x[group],
-                                y=np.add(y[group], std[group]),
-                                mode="lines",
-                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
-                                hovertemplate="%{y}<extra>upper bound</extra>",
-                                legendgroup=group,
-                                showlegend=False,
-                                xaxis=xaxis,
-                                yaxis=yaxis,
-                            ),
-                            go.Scatter(
-                                x=x[group],
-                                y=np.subtract(y[group], std[group]),
-                                mode="lines",
-                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
-                                fill="tonexty",
-                                fillcolor=fillcolor,
-                                hovertemplate="%{y}<extra>lower bound</extra>",
-                                legendgroup=group,
-                                showlegend=False,
-                                xaxis=xaxis,
-                                yaxis=yaxis,
-                            ),
-                        ]
-                    )
-
-        fig.update_layout({f"xaxis{yaxis[1:]}": dict(dtick=1, autorange="reversed")})
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            groupclick="togglegroup",
-            title=title,
-            legend=legend,
-            xlabel="n_models",
-            ylabel="Score",
-            figsize=figsize,
-            plotname="plot_successive_halving",
-            filename=filename,
-            display=display,
-        )
-
-    @available_if(has_task(["binary", "multilabel"]))
-    @composed(crash, plot_from_model)
-    def plot_threshold(
-        self,
-        models: INT | str | MODEL | slice | SEQUENCE | None = None,
-        metric: METRIC_SELECTOR = None,
-        dataset: str = "test",
-        target: INT | str = 0,
-        steps: INT = 100,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = "lower left",
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> go.Figure | None:
-        """Plot metric performances against threshold values.
-
-        This plot is available only for models with a `predict_proba`
-        method in a binary or [multilabel][] classification task.
-
-        Parameters
-        ----------
-        models: int, str, Model, slice, sequence or None, default=None
-            Models to plot. If None, all models are selected.
-
-        metric: str, func, scorer, sequence or None, default=None
-            Metric to plot. Choose from any of sklearn's scorers, a
-            function with signature `metric(y_true, y_pred)`, a scorer
-            object or a sequence of these. Use a sequence or add `+`
-            between options to select more than one. If None, the
-            metric used to run the pipeline is selected.
-
-        dataset: str, default="test"
-            Data set on which to calculate the metric. Choose from:
-            "train", "test" or "holdout".
-
-        target: int or str, default=0
-            Target column to look at. Only for [multilabel][] tasks.
-
-        steps: int, default=100
-            Number of thresholds measured.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default="lower left"
-            Legend for the plot. See the [user guide][parameters] for
-            an extended description of the choices.
-
-            - If None: No legend is shown.
-            - If str: Location where to show the legend.
-            - If dict: Legend configuration.
-
-        figsize: tuple, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as html. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [go.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_calibration
-        atom.plots:PredictionPlot.plot_confusion_matrix
-        atom.plots:PredictionPlot.plot_probabilities
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run(["LR", "RF"])
-        atom.plot_threshold()
-        ```
-
-        """
-        check_predict_proba(models, "plot_threshold")
-        ds = self._get_set(dataset, max_one=True)
-        target = self.branch._get_target(target, only_columns=True)
-
-        # Get all metric functions from the input
-        if metric is None:
-            metrics = [m._score_func for m in self._metric]
-        else:
-            metrics = []
-            for m in lst(metric):
-                if isinstance(m, str):
-                    metrics.extend(m.split("+"))
-                else:
-                    metrics.append(m)
-            metrics = [get_custom_scorer(m)._score_func for m in metrics]
-
-        fig = self._get_figure()
-        xaxis, yaxis = BasePlot._fig.get_axes()
-
-        steps = np.linspace(0, 1, steps)
-        for m in models:
-            y_true, y_pred = m._get_pred(ds, target, attr="predict_proba")
-            for met in metrics:
-                fig.add_trace(
-                    self._draw_line(
-                        x=steps,
-                        y=[met(y_true, y_pred >= step) for step in steps],
-                        parent=m.name,
-                        child=met.__name__,
-                        legend=legend,
-                        xaxis=xaxis,
-                        yaxis=yaxis,
-                    )
-                )
-
-        BasePlot._fig.used_models.extend(models)
-        return self._plot(
-            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
-            xlabel="Threshold",
-            ylabel="Score",
-            title=title,
-            legend=legend,
-            figsize=figsize,
-            plotname="plot_threshold",
-            filename=filename,
-            display=display,
-        )
-
-
-@typechecked
-class ShapPlot(BasePlot):
-    """Shap plots.
-
-    ATOM wrapper for plots made by the shap package, using Shapley
-    values for model interpretation. These plots are accessible from
-    the runners or from the models. Only one model can be plotted at
-    the same time since the plots are not made by ATOM.
-
-    """
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_bar(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: SLICE | None = None,
-        show: INT | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's bar plot.
-
-        Create a bar plot of a set of SHAP values. If a single sample
-        is passed, then the SHAP values are plotted. If many samples
-        are passed, then the mean absolute value for each feature
-        column is plotted. Read more about SHAP plots in the
-        [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_bar()`.
-
-        index: int, str, slice, sequence or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_parshap
-        atom.plots:ShapPlot.plot_shap_beeswarm
-        atom.plots:ShapPlot.plot_shap_scatter
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_bar(show=10)
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        show = self._get_show(show, models)
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_bar")
-
-        shap.plots.bar(explanation, max_display=show, show=False)
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            xlabel=plt.gca().get_xlabel(),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_shap_bar",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_beeswarm(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: slice | SEQUENCE | None = None,
-        show: INT | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's beeswarm plot.
-
-        The plot is colored by feature values. Read more about SHAP
-        plots in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_beeswarm()`.
-
-        index: tuple, slice or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set. The beeswarm plot does not support plotting
-            a single sample.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:PredictionPlot.plot_parshap
-        atom.plots:ShapPlot.plot_shap_bar
-        atom.plots:ShapPlot.plot_shap_scatter
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_beeswarm(show=10)
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        show = self._get_show(show, models)
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_beeswarm")
-
-        shap.plots.beeswarm(explanation, max_display=show, show=False)
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            xlabel=plt.gca().get_xlabel(),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_decision(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: SLICE | None = None,
-        show: INT | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's decision plot.
-
-        Visualize model decisions using cumulative SHAP values. Each
-        plotted line explains a single model prediction. If a single
-        prediction is plotted, feature values are printed in the
-        plot (if supplied). If multiple predictions are plotted
-        together, feature values will not be printed. Plotting too
-        many predictions together will make the plot unintelligible.
-        Read more about SHAP plots in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_decision()`.
-
-        index: int, str, slice, sequence or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:ShapPlot.plot_shap_bar
-        atom.plots:ShapPlot.plot_shap_beeswarm
-        atom.plots:ShapPlot.plot_shap_force
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_decision(show=10)
-        atom.plot_shap_decision(index=-1, show=10)
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        show = self._get_show(show, models)
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_decision")
-
-        shap.decision_plot(
-            base_value=explanation.base_values,
-            shap_values=explanation.values,
-            features=rows,
-            feature_display_range=slice(-1, -show - 1, -1),
-            auto_size_plot=False,
-            show=False,
-        )
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            xlabel=plt.gca().get_xlabel(),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_shap_decision",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_force(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: SLICE | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 300),
-        filename: str | None = None,
-        display: bool | None = True,
-        **kwargs,
-    ) -> plt.Figure | None:
-        """Plot SHAP's force plot.
-
-        Visualize the given SHAP values with an additive force layout.
-        Note that by default this plot will render using javascript.
-        For a regular figure use `matplotlib=True` (this option is
-        only available when only a single sample is plotted). Read more
-        about SHAP plots in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_force()`.
-
-        index: int, str, slice, sequence or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=(900, 300)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        **kwargs
-            Additional keyword arguments for [shap.plots.force][force].
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:ShapPlot.plot_shap_beeswarm
-        atom.plots:ShapPlot.plot_shap_scatter
-        atom.plots:ShapPlot.plot_shap_decision
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_force(index=-2, matplotlib=True, figsize=(1800, 300))
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        self._get_figure(create_figure=False, backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_force")
-
-        plot = shap.force_plot(
-            base_value=explanation.base_values,
-            shap_values=explanation.values,
-            features=rows,
-            show=False,
-            **kwargs,
-        )
-
-        if kwargs.get("matplotlib"):
-            BasePlot._fig.used_models.append(models)
-            return self._plot(
-                fig=plt.gcf(),
-                ax=plt.gca(),
-                title=title,
-                legend=legend,
-                figsize=figsize,
-                plotname="plot_shap_force",
-                filename=filename,
-                display=display,
-            )
-        else:
-            if filename:  # Save to a html file
-                if not filename.endswith(".html"):
-                    filename += ".html"
-                shap.save_html(filename, plot)
-            if display and find_spec("IPython"):
-                from IPython.display import display
-
-                shap.initjs()
-                display(plot)
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_heatmap(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: slice | SEQUENCE | None = None,
-        show: INT | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's heatmap plot.
-
-        This plot is designed to show the population substructure of a
-        dataset using supervised clustering and a heatmap. Supervised
-        clustering involves clustering data points not by their original
-        feature values but by their explanations. Read more about SHAP
-        plots in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_heatmap()`.
-
-        index: slice, sequence or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set. The plot_shap_heatmap method does not
-            support plotting a single sample.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:ShapPlot.plot_shap_decision
-        atom.plots:ShapPlot.plot_shap_force
-        atom.plots:ShapPlot.plot_shap_waterfall
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_heatmap(show=10)
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        show = self._get_show(show, models)
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_heatmap")
-
-        shap.plots.heatmap(explanation, max_display=show, show=False)
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            xlabel=plt.gca().get_xlabel(),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_shap_heatmap",
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_scatter(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: slice | SEQUENCE | None = None,
-        columns: INT | str = 0,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] = (900, 600),
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's scatter plot.
-
-        Plots the value of the feature on the x-axis and the SHAP value
-        of the same feature on the y-axis. This shows how the model
-        depends on the given feature, and is like a richer extension of
-        the classical partial dependence plots. Vertical dispersion of
-        the data points represents interaction effects. Read more about
-        SHAP plots in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_scatter()`.
-
-        index: slice, sequence or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set. The plot_shap_scatter method does not
-            support plotting a single sample.
-
-        columns: int or str, default=0
-            Column to plot.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=(900, 600)
-            Figure's size in pixels, format as (x, y).
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:ShapPlot.plot_shap_beeswarm
-        atom.plots:ShapPlot.plot_shap_decision
-        atom.plots:ShapPlot.plot_shap_force
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_scatter(columns="symmetry error")
-        ```
-
-        """
-        rows = models.X.loc[models.branch._get_rows(index)]
-        column = models.branch._get_columns(columns, include_target=False)[0]
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        # Get explanation for a specific column
-        explanation = explanation[:, models.columns.get_loc(column)]
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_scatter")
-
-        shap.plots.scatter(explanation, color=explanation, ax=plt.gca(), show=False)
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            xlabel=plt.gca().get_xlabel(),
-            ylabel=plt.gca().get_ylabel(),
-            title=title,
-            legend=legend,
-            plotname="plot_shap_scatter",
-            figsize=figsize,
-            filename=filename,
-            display=display,
-        )
-
-    @composed(crash, plot_from_model(max_one=True))
-    def plot_shap_waterfall(
-        self,
-        models: INT | str | MODEL | None = None,
-        index: INT | str | None = None,
-        show: INT | None = None,
-        target: INT | str | tuple = 1,
-        *,
-        title: str | dict | None = None,
-        legend: str | dict | None = None,
-        figsize: tuple[INT, INT] | None = None,
-        filename: str | None = None,
-        display: bool | None = True,
-    ) -> plt.Figure | None:
-        """Plot SHAP's waterfall plot.
-
-        The SHAP value of a feature represents the impact of the
-        evidence provided by that feature on the model’s output. The
-        waterfall plot is designed to visually display how the SHAP
-        values (evidence) of each feature move the model output from
-        our prior expectation under the background data distribution,
-        to the final model prediction given the evidence of all the
-        features. Features are sorted by the magnitude of their SHAP
-        values with the smallest magnitude features grouped together
-        at the bottom of the plot when the number of features in the
-        models exceeds the `show` parameter. Read more about SHAP plots
-        in the [user guide][shap].
-
-        Parameters
-        ----------
-        models: int, str, Model or None, default=None
-            Model to plot. If None, all models are selected. Note that
-            leaving the default option could raise an exception if there
-            are multiple models. To avoid this, call the plot directly
-            from a model, e.g. `atom.lr.plot_shap_waterfall()`.
-
-        index: int, str or None, default=None
-            Rows in the dataset to plot. If None, it selects all rows
-            in the test set. The plot_shap_waterfall method does not
-            support plotting multiple samples.
-
-        show: int or None, default=None
-            Number of features (ordered by importance) to show. If
-            None, it shows all features.
-
-        target: int, str or tuple, default=1
-            Class in the target column to target. For multioutput tasks,
-            the value should be a tuple of the form (column, class).
-            Note that for binary and multilabel tasks, the selected
-            class is always the positive one.
-
-        title: str, dict or None, default=None
-            Title for the plot.
-
-            - If None, no title is shown.
-            - If str, text for the title.
-            - If dict, [title configuration][parameters].
-
-        legend: str, dict or None, default=None
-            Does nothing. Implemented for continuity of the API.
-
-        figsize: tuple or None, default=None
-            Figure's size in pixels, format as (x, y). If None, it
-            adapts the size to the number of features shown.
-
-        filename: str or None, default=None
-            Save the plot using this name. Use "auto" for automatic
-            naming. The type of the file depends on the provided name
-            (.html, .png, .pdf, etc...). If `filename` has no file type,
-            the plot is saved as png. If None, the plot is not saved.
-
-        display: bool or None, default=True
-            Whether to render the plot. If None, it returns the figure.
-
-        Returns
-        -------
-        [plt.Figure][] or None
-            Plot object. Only returned if `display=None`.
-
-        See Also
-        --------
-        atom.plots:ShapPlot.plot_shap_bar
-        atom.plots:ShapPlot.plot_shap_beeswarm
-        atom.plots:ShapPlot.plot_shap_heatmap
-
-        Examples
-        --------
-        ```pycon
-        from atom import ATOMClassifier
-        from sklearn.datasets import load_breast_cancer
-
-        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
-
-        atom = ATOMClassifier(X, y, random_state=1)
-        atom.run("LR")
-        atom.plot_shap_waterfall(show=10)
-        ```
-
-        """
-        rows = models.X.loc[[models.branch._get_rows(index)[0]]]
-        show = self._get_show(show, models)
-        target = self.branch._get_target(target)
-        explanation = models._shap.get_explanation(rows, target)
-
-        # Waterfall accepts only one row
-        explanation.values = explanation.values[0]
-        explanation.data = explanation.data[0]
-
-        self._get_figure(backend="matplotlib")
-        check_canvas(BasePlot._fig.is_canvas, "plot_shap_waterfall")
-
-        shap.plots.waterfall(explanation, max_display=show, show=False)
-
-        BasePlot._fig.used_models.append(models)
-        return self._plot(
-            ax=plt.gca(),
-            title=title,
-            legend=legend,
-            figsize=figsize or (900, 400 + show * 50),
-            plotname="plot_shap_waterfall",
-            filename=filename,
-            display=display,
-        )
diff --git a/atom/plots/__init__.py b/atom/plots/__init__.py
new file mode 100644
index 000000000..765a2ac2e
--- /dev/null
+++ b/atom/plots/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module for plots.
+
+"""
+
+from atom.plots.dataplot import DataPlot
+from atom.plots.featureselectionplot import FeatureSelectionPlot
+from atom.plots.hyperparametertuningplot import HyperparameterTuningPlot
+from atom.plots.predictionplot import PredictionPlot
+from atom.plots.shapplot import ShapPlot
+
+
+class ATOMPlot(
+    FeatureSelectionPlot,
+    DataPlot,
+    HyperparameterTuningPlot,
+    PredictionPlot,
+    ShapPlot,
+):
+    """Plot classes inherited by main ATOM classes."""
+    pass
+
+
+class RunnerPlot(HyperparameterTuningPlot, PredictionPlot, ShapPlot):
+    """Plot classes inherited by the runners and callable from models."""
+    pass
diff --git a/atom/plots/base.py b/atom/plots/base.py
new file mode 100644
index 000000000..7028c6ce9
--- /dev/null
+++ b/atom/plots/base.py
@@ -0,0 +1,1117 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the base classes for plotting.
+
+"""
+
+from __future__ import annotations
+
+from contextlib import contextmanager
+from dataclasses import dataclass
+from itertools import cycle
+from typing import Literal
+
+import matplotlib.pyplot as plt
+import plotly.express as px
+import plotly.graph_objects as go
+from mlflow.tracking import MlflowClient
+from typeguard import typechecked
+
+from atom.utils.constants import PALETTE
+from atom.utils.types import (
+    BOOL, DATAFRAME, FLOAT, INDEX, INT, INT_TYPES, LEGEND, MODEL, SCALAR,
+    SEQUENCE,
+)
+from atom.utils.utils import (
+    composed, crash, divide, get_custom_scorer, lst, rnd, to_rgb,
+)
+
+
+@dataclass
+class Aesthetics:
+    """Keeps track of plot aesthetics."""
+
+    palette: SEQUENCE  # Sequence of colors
+    title_fontsize: INT  # Fontsize for titles
+    label_fontsize: INT  # Fontsize for labels, legend and hoverinfo
+    tick_fontsize: INT  # Fontsize for ticks
+    line_width: INT  # Width of the line plots
+    marker_size: INT  # Size of the markers
+
+
+@typechecked
+class BaseFigure:
+    """Base plotly figure.
+
+    The instance stores the position of the current axes in grid,
+    as well as the models used for the plot (to track in mlflow).
+
+    Parameters
+    ----------
+    rows: int, default=1
+        Number of subplot rows in the canvas.
+
+    cols: int, default=1
+        Number of subplot columns in the canvas.
+
+    horizontal_spacing: float, default=0.05
+        Space between subplot rows in normalized plot coordinates.
+        The spacing is relative to the figure's size.
+
+    vertical_spacing: float, default=0.07
+        Space between subplot cols in normalized plot coordinates.
+        The spacing is relative to the figure's size.
+
+    palette: str or sequence, default="Prism"
+        Name or color sequence for the palette.
+
+    is_canvas: bool, default=False
+        Whether the figure shows multiple plots.
+
+    backend: str, default="plotly"
+        Figure's backend. Choose between plotly or matplotlib.
+
+    create_figure: bool, default=True
+        Whether to create a new figure.
+
+    """
+
+    _marker = ["circle", "x", "diamond", "pentagon", "star", "hexagon"]
+    _dash = [None, "dashdot", "dash", "dot", "longdash", "longdashdot"]
+    _shape = ["", "/", "x", "\\", "-", "|", "+", "."]
+
+    def __init__(
+        self,
+        rows: INT = 1,
+        cols: INT = 1,
+        horizontal_spacing: FLOAT = 0.05,
+        vertical_spacing: FLOAT = 0.07,
+        palette: str | SEQUENCE = "Prism",
+        is_canvas: BOOL = False,
+        backend: Literal["plotly", "matplotlib"] = "plotly",
+        create_figure: BOOL = True,
+    ):
+        self.rows = rows
+        self.cols = cols
+        self.horizontal_spacing = horizontal_spacing
+        self.vertical_spacing = vertical_spacing
+        if isinstance(palette, str):
+            self._palette = getattr(px.colors.qualitative, palette)
+            self.palette = cycle(self._palette)
+        else:
+            # Convert color names or hex to rgb
+            self._palette = list(map(to_rgb, palette))
+            self.palette = cycle(self._palette)
+        self.is_canvas = is_canvas
+        self.backend = backend
+        self.create_figure = create_figure
+
+        self.idx = 0  # N-th plot in the canvas
+        self.axes = 0  # N-th axis in the canvas
+        if self.create_figure:
+            if self.backend == "plotly":
+                self.figure = go.Figure()
+            else:
+                self.figure, _ = plt.subplots()
+
+        self.groups = []
+        self.style = dict(palette={}, marker={}, dash={}, shape={})
+        self.marker = cycle(self._marker)
+        self.dash = cycle(self._dash)
+        self.shape = cycle(self._shape)
+
+        self.pos = {}  # Subplot position to use for title
+        self.custom_layout = {}  # Layout params specified by user
+        self.used_models = []  # Models plotted in this figure
+
+        # Perform parameter checks
+        if not 0 < horizontal_spacing < 1:
+            raise ValueError(
+                "Invalid value for the horizontal_spacing parameter. The "
+                f"value must lie between 0 and 1, got {horizontal_spacing}."
+            )
+
+        if not 0 < vertical_spacing < 1:
+            raise ValueError(
+                "Invalid value for the vertical_spacing parameter. The "
+                f"value must lie between 0 and 1, got {vertical_spacing}."
+            )
+
+    @property
+    def grid(self) -> tuple[INT, INT]:
+        """Position of the current axes on the grid.
+
+        Returns
+        -------
+        int
+            X-position.
+
+        int
+            Y-position.
+
+        """
+        return (self.idx - 1) // self.cols + 1, self.idx % self.cols or self.cols
+
+    @property
+    def next_subplot(self) -> go.Figure | plt.Figure | None:
+        """Increase the subplot index.
+
+        Returns
+        -------
+        go.Figure, plt.Figure or None
+            Current figure. Returns None if `create_figure=False`.
+
+        """
+        # Check if there are too many plots in the canvas
+        if self.idx >= self.rows * self.cols:
+            raise ValueError(
+                "Invalid number of plots in the canvas! Increase "
+                "the number of rows and cols to add more plots."
+            )
+        else:
+            self.idx += 1
+
+        if self.create_figure:
+            return self.figure
+
+    def get_elem(
+        self,
+        name: SCALAR | str | None = None,
+        element: Literal["palette", "marker", "dash", "shape"] = "palette",
+    ) -> str | None:
+        """Get the plot element for a specific name.
+
+        This method is used to assign the same element (color, marker,
+        etc...) to the same columns and models in a plot.
+
+        Parameters
+        ----------
+        name: int, float or str or None, default=None
+            Name for which to get the plot element. The name is stored in
+            the element attributes to assign the same element to all calls
+            with the same name. If None, return the first element.
+
+        element: str, default="palette"
+            Plot element to get. Choose from: palette, marker, dash, shape.
+
+        Returns
+        -------
+        str or None
+            Element code.
+
+        """
+        if name is None:
+            return getattr(self, f"_{element}")[0]  # Get first element (default)
+        elif name in self.style[element]:
+            return self.style[element][name]
+        else:
+            return self.style[element].setdefault(name, next(getattr(self, element)))
+
+    def showlegend(self, name: str, legend: LEGEND | dict | None) -> BOOL:
+        """Get whether the trace should be showed in the legend.
+
+        If there's already a trace with the same name, it's not
+        necessary to show it in the plot's legend.
+
+        Parameters
+        ----------
+        name: str
+            Name of the trace.
+
+        legend: str, dict or None
+            Legend parameter.
+
+        Returns
+        -------
+        bool
+            Whether the trace should be placed in the legend.
+
+        """
+        if name in self.groups:
+            return False
+        else:
+            self.groups.append(name)
+            return legend is not None
+
+    def get_axes(
+        self,
+        x: tuple[SCALAR, SCALAR] = (0, 1),
+        y: tuple[SCALAR, SCALAR] = (0, 1),
+        coloraxis: dict | None = None,
+    ) -> tuple[str, str]:
+        """Create and update the plot's axes.
+
+        Parameters
+        ----------
+        x: tuple
+            Relative x-size of the plot.
+
+        y: tuple
+            Relative y-size of the plot.
+
+        coloraxis: dict or None
+            Properties of the coloraxis to create. None to ignore.
+
+        Returns
+        -------
+        str
+            Name of the x-axis.
+
+        str
+            Name of the y-axis.
+
+        """
+        self.axes += 1
+
+        # Calculate the distance between subplots
+        x_offset = divide(self.horizontal_spacing, (self.cols - 1))
+        y_offset = divide(self.vertical_spacing, (self.rows - 1))
+
+        # Calculate the size of the subplot
+        x_size = (1 - ((x_offset * 2) * (self.cols - 1))) / self.cols
+        y_size = (1 - ((y_offset * 2) * (self.rows - 1))) / self.rows
+
+        # Calculate the size of the axes
+        ax_size = (x[1] - x[0]) * x_size
+        ay_size = (y[1] - y[0]) * y_size
+
+        # Determine the position for the axes
+        x_pos = (self.grid[1] - 1) * (x_size + 2 * x_offset) + x[0] * x_size
+        y_pos = (self.rows - self.grid[0]) * (y_size + 2 * y_offset) + y[0] * y_size
+
+        # Store positions for subplot title
+        self.pos[str(self.axes)] = (x_pos + ax_size / 2, rnd(y_pos + ay_size))
+
+        # Update the figure with the new axes
+        self.figure.update_layout(
+            {
+                f"xaxis{self.axes}": dict(
+                    domain=(x_pos, rnd(x_pos + ax_size)), anchor=f"y{self.axes}"
+                ),
+                f"yaxis{self.axes}": dict(
+                    domain=(y_pos, rnd(y_pos + ay_size)), anchor=f"x{self.axes}"
+                ),
+            }
+        )
+
+        # Place a colorbar right of the axes
+        if coloraxis:
+            if title := coloraxis.pop("title", None):
+                coloraxis["colorbar_title"] = dict(
+                    text=title, side="right", font_size=coloraxis.pop("font_size")
+                )
+
+            coloraxis["colorbar_x"] = rnd(x_pos + ax_size) + ax_size / 40
+            coloraxis["colorbar_xanchor"] = "left"
+            coloraxis["colorbar_y"] = y_pos + ay_size / 2
+            coloraxis["colorbar_yanchor"] = "middle"
+            coloraxis["colorbar_len"] = ay_size * 0.9
+            coloraxis["colorbar_thickness"] = ax_size * 30  # Default width in pixels
+            self.figure.update_layout(
+                {f"coloraxis{coloraxis.pop('axes', self.axes)}": coloraxis}
+            )
+
+        xaxis = f"x{self.axes if self.axes > 1 else ''}"
+        yaxis = f"y{self.axes if self.axes > 1 else ''}"
+        return xaxis, yaxis
+
+
+@typechecked
+class BasePlot:
+    """Base class for all plotting methods.
+
+    This base class defines the properties that can be changed
+    to customize the plot's aesthetics.
+
+    """
+
+    _fig = None
+    _custom_layout = {}
+    _custom_traces = {}
+    _aesthetics = Aesthetics(
+        palette=list(PALETTE),
+        title_fontsize=24,
+        label_fontsize=16,
+        tick_fontsize=12,
+        line_width=2,
+        marker_size=8,
+    )
+
+    # Properties =================================================== >>
+
+    @property
+    def aesthetics(self) -> Aesthetics:
+        """All plot aesthetic attributes."""
+        return self._aesthetics
+
+    @aesthetics.setter
+    def aesthetics(self, value: dict):
+        self.palette = value.get("palette", self.palette)
+        self.title_fontsize = value.get("title_fontsize", self.title_fontsize)
+        self.label_fontsize = value.get("label_fontsize", self.label_fontsize)
+        self.tick_fontsize = value.get("tick_fontsize", self.tick_fontsize)
+        self.line_width = value.get("line_width", self.line_width)
+        self.marker_size = value.get("marker_size", self.marker_size)
+
+    @property
+    def palette(self) -> str | SEQUENCE:
+        """Color palette.
+
+        Specify one of plotly's [built-in palettes][palette] or create
+        a custom one, e.g. `atom.palette = ["red", "green", "blue"]`.
+
+        """
+        return self._aesthetics.palette
+
+    @palette.setter
+    def palette(self, value: str | SEQUENCE):
+        if isinstance(value, str) and not hasattr(px.colors.qualitative, value):
+            raise ValueError(
+                f"Invalid value for the palette parameter, got {value}. Choose "
+                f"from one of plotly's built-in qualitative color sequences in "
+                f"the px.colors.qualitative module or define your own sequence."
+            )
+
+        self._aesthetics.palette = value
+
+    @property
+    def title_fontsize(self) -> INT:
+        """Fontsize for the plot's title."""
+        return self._aesthetics.title_fontsize
+
+    @title_fontsize.setter
+    def title_fontsize(self, value: INT):
+        if value <= 0:
+            raise ValueError(
+                "Invalid value for the title_fontsize parameter. "
+                f"Value should be >=0, got {value}."
+            )
+
+        self._aesthetics.title_fontsize = value
+
+    @property
+    def label_fontsize(self) -> INT:
+        """Fontsize for the labels, legend and hover information."""
+        return self._aesthetics.label_fontsize
+
+    @label_fontsize.setter
+    def label_fontsize(self, value: INT):
+        if value <= 0:
+            raise ValueError(
+                "Invalid value for the label_fontsize parameter. "
+                f"Value should be >=0, got {value}."
+            )
+
+        self._aesthetics.label_fontsize = value
+
+    @property
+    def tick_fontsize(self) -> INT:
+        """Fontsize for the ticks along the plot's axes."""
+        return self._aesthetics.tick_fontsize
+
+    @tick_fontsize.setter
+    def tick_fontsize(self, value: INT):
+        if value <= 0:
+            raise ValueError(
+                "Invalid value for the tick_fontsize parameter. "
+                f"Value should be >=0, got {value}."
+            )
+
+        self._aesthetics.tick_fontsize = value
+
+    @property
+    def line_width(self) -> INT:
+        """Width of the line plots."""
+        return self._aesthetics.line_width
+
+    @line_width.setter
+    def line_width(self, value: INT):
+        if value <= 0:
+            raise ValueError(
+                "Invalid value for the line_width parameter. "
+                f"Value should be >=0, got {value}."
+            )
+
+        self._aesthetics.line_width = value
+
+    @property
+    def marker_size(self) -> INT:
+        """Size of the markers."""
+        return self._aesthetics.marker_size
+
+    @marker_size.setter
+    def marker_size(self, value: INT):
+        if value <= 0:
+            raise ValueError(
+                "Invalid value for the marker_size parameter. "
+                f"Value should be >=0, got {value}."
+            )
+
+        self._aesthetics.marker_size = value
+
+    # Methods ====================================================== >>
+
+    @staticmethod
+    def _get_plot_index(df: DATAFRAME) -> INDEX:
+        """Return the dataset's index in a plottable format.
+
+        Plotly does not accept all index formats (e.g. pd.Period),
+        thus use this utility method to convert to timestamp those
+        indices that can, else return as is.
+
+        Parameters
+        ----------
+        df: dataframe
+            Data set to get the index from.
+
+        Returns
+        -------
+        index
+            Index in an acceptable format.
+
+        """
+        if hasattr(df.index, "to_timestamp"):
+            return df.index.to_timestamp()
+        else:
+            return df.index
+
+    @staticmethod
+    def _get_show(show: INT | None, model: MODEL | list[MODEL]) -> INT:
+        """Check and return the number of features to show.
+
+        Parameters
+        ----------
+        show: int or None
+            Number of features to show. If None, select all (max 200).
+
+        model: Model or list
+            Models from which to get the features.
+
+        Returns
+        -------
+        int
+            Number of features to show.
+
+        """
+        max_fxs = max(m.n_features for m in lst(model))
+        if show is None or show > max_fxs:
+            # Limit max features shown to avoid maximum figsize error
+            show = min(200, max_fxs)
+        elif show < 1:
+            raise ValueError(
+                f"Invalid value for the show parameter. Value should be >0, got {show}."
+            )
+
+        return show
+
+    @staticmethod
+    def _get_hyperparams(
+        params: str | slice | SEQUENCE | None,
+        model: MODEL,
+    ) -> list[str]:
+        """Check and return a model's hyperparameters.
+
+        Parameters
+        ----------
+        params: str, slice, sequence or None
+            Hyperparameters to get. Use a sequence or add `+` between
+            options to select more than one. If None, all the model's
+            hyperparameters are selcted.
+
+        model: Model
+            Get the params from this model.
+
+        Returns
+        -------
+        list of str
+            Selected hyperparameters.
+
+        """
+        if params is None:
+            hyperparameters = list(model._ht["distributions"])
+        elif isinstance(params, slice):
+            hyperparameters = list(model._ht["distributions"])[params]
+        else:
+            hyperparameters = []
+            for param in lst(params):
+                if isinstance(param, INT_TYPES):
+                    hyperparameters.append(list(model._ht["distributions"])[param])
+                elif isinstance(param, str):
+                    for p in param.split("+"):
+                        if p not in model._ht["distributions"]:
+                            raise ValueError(
+                                "Invalid value for the params parameter. "
+                                f"Hyperparameter {p} was not used during the "
+                                f"optimization of model {model.name}."
+                            )
+                        else:
+                            hyperparameters.append(p)
+
+        if not hyperparameters:
+            raise ValueError(f"Didn't find any hyperparameters for model {model.name}.")
+
+        return hyperparameters
+
+    def _get_metric(
+        self,
+        metric: INT | str | SEQUENCE | None,
+        max_one: BOOL,
+    ) -> INT | str | list[INT | str]:
+        """Check and return the provided metric index.
+
+        Parameters
+        ----------
+        metric: int, str, sequence or None
+            Metric to retrieve. If None, all metrics are returned.
+
+        max_one: bool
+            Whether one or multiple metrics are allowed.
+
+        Returns
+        -------
+        int or list
+            Position index of the metric. If `max_one=False`, returns
+            a list of metric positions.
+
+        """
+        if metric is None:
+            return list(range(len(self._metric)))
+        else:
+            inc = []
+            for met in lst(metric):
+                if isinstance(met, INT_TYPES):
+                    if 0 <= met < len(self._metric):
+                        inc.append(met)
+                    else:
+                        raise ValueError(
+                            f"Invalid value for the metric parameter. Value {met} is out "
+                            f"of range for a pipeline with {len(self._metric)} metrics."
+                        )
+                elif isinstance(met, str):
+                    met = met.lower()
+                    for m in met.split("+"):
+                        if m in ("time_ht", "time_fit", "time_bootstrap", "time"):
+                            inc.append(m)
+                        elif (name := get_custom_scorer(m).name) in self.metric:
+                            inc.append(self._metric.index(name))
+                        else:
+                            raise ValueError(
+                                "Invalid value for the metric parameter. The "
+                                f"{name} metric wasn't used to fit the models."
+                            )
+
+        if len(inc) > 1 and max_one:
+            raise ValueError(
+                "Invalid value for the metric parameter. "
+                f"Only one metric is allowed, got {inc}."
+            )
+
+        return inc[0] if max_one else inc
+
+    def _get_set(
+        self,
+        dataset: str | SEQUENCE,
+        max_one: BOOL,
+        allow_holdout: BOOL = True,
+    ) -> str | list[str]:
+        """Check and return the provided data set.
+
+        Parameters
+        ----------
+        dataset: str or sequence
+            Name(s) of the data set to retrieve.
+
+        max_one: bool
+            Whether one or multiple data sets are allowed. If True, return
+            the data set instead of a list.
+
+        allow_holdout: bool, default=True
+            Whether to allow the retrieval of the holdout set.
+
+        Returns
+        -------
+        str or list
+            Selected data set(s).
+
+        """
+        for ds in (dataset := "+".join(lst(dataset)).lower().split("+")):
+            if ds == "holdout":
+                if allow_holdout:
+                    if self.holdout is None:
+                        raise ValueError(
+                            "Invalid value for the dataset parameter. No holdout "
+                            "data set was specified when initializing the instance."
+                        )
+                else:
+                    raise ValueError(
+                        "Invalid value for the dataset parameter, got "
+                        f"{ds}. Choose from: train, test."
+                    )
+            elif ds not in ("train", "test"):
+                raise ValueError(
+                    "Invalid value for the dataset parameter, got {ds}. "
+                    f"Choose from: train, test{', holdout' if allow_holdout else ''}."
+                )
+
+        if max_one and len(dataset) > 1:
+            raise ValueError(
+                "Invalid value for the dataset parameter, got "
+                f"{dataset}. Only one data set is allowed."
+            )
+
+        return dataset[0] if max_one else dataset
+
+    def _get_figure(self, **kwargs) -> go.Figure | plt.Figure | None:
+        """Return existing figure if in canvas, else a new figure.
+
+        Every time this method is called from a canvas, the plot
+        index is raised by one to keep track in which subplot the
+        BaseFigure is at.
+
+        Parameters
+        ----------
+        **kwargs
+            Additional keyword arguments for BaseFigure.
+
+        Returns
+        -------
+        [go.Figure][], [plt.Figure][] or None
+            Existing figure or newly created. Returns None if kwarg
+            `create_figure=False`.
+
+        """
+        if BasePlot._fig and BasePlot._fig.is_canvas:
+            return BasePlot._fig.next_subplot
+        else:
+            BasePlot._fig = BaseFigure(palette=self.palette, **kwargs)
+            return BasePlot._fig.next_subplot
+
+    def _draw_line(
+        self,
+        parent: str,
+        child: str | None = None,
+        legend: str | dict = None,
+        **kwargs,
+    ) -> go.Scatter:
+        """Draw a line.
+
+        Unify the style to draw a line, where parent and child
+        (e.g. model - data set or column - distribution) keep the
+        same style (color or dash). A legendgroup title is only added
+        when there is a child element.
+
+        Parameters
+        ----------
+        parent: str
+            Name of the model.
+
+        child: str or None, default=None
+            Data set which is plotted.
+
+        legend: str, dict or None
+            Legend argument provided by the user.
+
+        **kwargs
+            Additional keyword arguments for the trace.
+
+        Returns
+        -------
+        go.Scatter
+            New trace to add to figure.
+
+        """
+        legendgrouptitle = dict(text=parent, font_size=self.label_fontsize)
+        hover = f"(%{{x}}, %{{y}})<extra>{parent}{f' - {child}' if child else ''}</extra>"
+        return go.Scatter(
+            line=dict(
+                width=self.line_width,
+                color=BasePlot._fig.get_elem(parent),
+                dash=BasePlot._fig.get_elem(child, "dash"),
+            ),
+            marker=dict(
+                symbol=BasePlot._fig.get_elem(child, "marker"),
+                size=self.marker_size,
+                color=BasePlot._fig.get_elem(parent),
+                line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+            ),
+            hovertemplate=kwargs.pop("hovertemplate", hover),
+            name=kwargs.pop("name", child or parent),
+            legendgroup=kwargs.pop("legendgroup", parent),
+            legendgrouptitle=legendgrouptitle if child else None,
+            showlegend=BasePlot._fig.showlegend(f"{parent}-{child}", legend),
+            **kwargs,
+        )
+
+    @staticmethod
+    def _draw_straight_line(y: SCALAR | str, xaxis: str, yaxis: str):
+        """Draw a line across the axis.
+
+        The line can be either horizontal or diagonal. The line should
+        be used as reference. It's not added to the legend and doesn't
+        show any information on hover.
+
+        Parameters
+        ----------
+        y: int, float or str, default = "diagonal"
+            Coordinates on the y-axis. If a value, draw a horizontal line
+            at that value. If "diagonal", draw a diagonal line from x.
+
+        xaxis: str
+            Name of the x-axis to draw in.
+
+        yaxis: str
+            Name of the y-axis to draw in.
+
+        """
+        BasePlot._fig.figure.add_shape(
+            type="line",
+            x0=0,
+            x1=1,
+            y0=0 if y == "diagonal" else y,
+            y1=1 if y == "diagonal" else y,
+            xref=f"{xaxis} domain",
+            yref=f"{yaxis} domain" if y == "diagonal" else yaxis,
+            line=dict(width=1, color="black", dash="dash"),
+            opacity=0.6,
+            layer="below",
+        )
+
+    def _plot(
+        self,
+        fig: go.Figure | plt.Figure | None = None,
+        ax: plt.Axes | tuple[str, str] | None = None,
+        **kwargs,
+    ) -> go.Figure | plt.Figure | None:
+        """Make the plot.
+
+        Customize the axes to the default layout and plot the figure
+        if it's not part of a canvas.
+
+        Parameters
+        ----------
+        fig: go.Figure, plt.Figure or None
+            Current figure. If None, use `plt.gcf()`.
+
+        ax: plt.Axes, tuple or None, default=None
+            Axis object or names of the axes to update. If None, ignore
+            their update.
+
+        **kwargs
+            Keyword arguments containing the figure's parameters.
+
+            - title: Name of the title or custom configuration.
+            - legend: Whether to show the legend or custom configuration.
+            - xlabel: Label for the x-axis.
+            - ylabel: Label for the y-axis.
+            - xlim: Limits for the x-axis.
+            - ylim: Limits for the y-axis.
+            - figsize: Size of the figure.
+            - filename: Name of the saved file.
+            - plotname: Name of the plot.
+            - display: Whether to show the plot. If None, return the figure.
+
+        Returns
+        -------
+        plt.Figure, go.Figure or None
+            Created figure. Only returned if `display=None`.
+
+        """
+        # Set name with which to save the file
+        if kwargs.get("filename"):
+            if kwargs["filename"].endswith("auto"):
+                name = kwargs["filename"].replace("auto", kwargs["plotname"])
+            else:
+                name = kwargs["filename"]
+        else:
+            name = kwargs.get("plotname")
+
+        fig = fig or BasePlot._fig.figure
+        if BasePlot._fig.backend == "plotly":
+            if ax:
+                fig.update_layout(
+                    {
+                        f"{ax[0]}_title": dict(
+                            text=kwargs.get("xlabel"), font_size=self.label_fontsize
+                        ),
+                        f"{ax[1]}_title": dict(
+                            text=kwargs.get("ylabel"), font_size=self.label_fontsize
+                        ),
+                        f"{ax[0]}_range": kwargs.get("xlim"),
+                        f"{ax[1]}_range": kwargs.get("ylim"),
+                        f"{ax[0]}_automargin": True,
+                        f"{ax[1]}_automargin": True,
+                    }
+                )
+
+                if BasePlot._fig.is_canvas and (title := kwargs.get("title")):
+                    # Add a subtitle to a plot in the canvas
+                    default_title = {
+                        "x": BasePlot._fig.pos[ax[0][5:] or "1"][0],
+                        "y": BasePlot._fig.pos[ax[0][5:] or "1"][1] + 0.005,
+                        "xref": "paper",
+                        "yref": "paper",
+                        "xanchor": "center",
+                        "yanchor": "bottom",
+                        "showarrow": False,
+                        "font_size": self.title_fontsize - 4,
+                    }
+
+                    if isinstance(title, dict):
+                        title = {**default_title, **title}
+                    else:
+                        title = {"text": title, **default_title}
+
+                    fig.update_layout(dict(annotations=fig.layout.annotations + (title,)))
+
+            if not BasePlot._fig.is_canvas and kwargs.get("plotname"):
+                default_title = dict(
+                    x=0.5,
+                    y=1,
+                    pad=dict(t=15, b=15),
+                    xanchor="center",
+                    yanchor="top",
+                    xref="paper",
+                    font_size=self.title_fontsize,
+                )
+                if isinstance(title := kwargs.get("title"), dict):
+                    title = {**default_title, **title}
+                else:
+                    title = {"text": title, **default_title}
+
+                default_legend = dict(
+                    traceorder="grouped",
+                    groupclick=kwargs.get("groupclick", "toggleitem"),
+                    font_size=self.label_fontsize,
+                    bgcolor="rgba(255, 255, 255, 0.5)",
+                )
+                if isinstance(legend := kwargs.get("legend"), str):
+                    position = {}
+                    if legend == "upper left":
+                        position = dict(x=0.01, y=0.99, xanchor="left", yanchor="top")
+                    elif legend == "lower left":
+                        position = dict(x=0.01, y=0.01, xanchor="left", yanchor="bottom")
+                    elif legend == "upper right":
+                        position = dict(x=0.99, y=0.99, xanchor="right", yanchor="top")
+                    elif legend == "lower right":
+                        position = dict(x=0.99, y=0.01, xanchor="right", yanchor="bottom")
+                    elif legend == "upper center":
+                        position = dict(x=0.5, y=0.99, xanchor="center", yanchor="top")
+                    elif legend == "lower center":
+                        position = dict(x=0.5, y=0.01, xanchor="center", yanchor="bottom")
+                    elif legend == "center left":
+                        position = dict(x=0.01, y=0.5, xanchor="left", yanchor="middle")
+                    elif legend == "center right":
+                        position = dict(x=0.99, y=0.5, xanchor="right", yanchor="middle")
+                    elif legend == "center":
+                        position = dict(x=0.5, y=0.5, xanchor="center", yanchor="middle")
+                    legend = {**default_legend, **position}
+                elif isinstance(legend, dict):
+                    legend = {**default_legend, **legend}
+
+                # Update layout with predefined settings
+                space1 = self.title_fontsize if title.get("text") else 10
+                space2 = self.title_fontsize * int(bool(fig.layout.annotations))
+                fig.update_layout(
+                    title=title,
+                    legend=legend,
+                    showlegend=bool(kwargs.get("legend")),
+                    hoverlabel=dict(font_size=self.label_fontsize),
+                    font_size=self.tick_fontsize,
+                    margin=dict(l=50, b=50, r=0, t=25 + space1 + space2, pad=0),
+                    width=kwargs["figsize"][0],
+                    height=kwargs["figsize"][1],
+                )
+
+                # Update plot with custom settings
+                fig.update_traces(**self._custom_traces)
+                fig.update_layout(**self._custom_layout)
+
+                if kwargs.get("filename"):
+                    if "." not in name or name.endswith(".html"):
+                        fig.write_html(name if "." in name else name + ".html")
+                    else:
+                        fig.write_image(name)
+
+                # Log plot to mlflow run of every model visualized
+                if getattr(self, "experiment", None) and self.log_plots:
+                    for m in set(BasePlot._fig.used_models):
+                        MlflowClient().log_figure(
+                            run_id=m._run.info.run_id,
+                            figure=fig,
+                            artifact_file=name if "." in name else f"{name}.html",
+                        )
+
+                if kwargs.get("display") is True:
+                    fig.show()
+                elif kwargs.get("display") is None:
+                    return fig
+
+        else:
+            if kwargs.get("title"):
+                ax.set_title(kwargs.get("title"), fontsize=self.title_fontsize, pad=20)
+            if kwargs.get("xlabel"):
+                ax.set_xlabel(kwargs["xlabel"], fontsize=self.label_fontsize, labelpad=12)
+            if kwargs.get("ylabel"):
+                ax.set_ylabel(kwargs["ylabel"], fontsize=self.label_fontsize, labelpad=12)
+            if ax is not None:
+                ax.tick_params(axis="both", labelsize=self.tick_fontsize)
+
+            if kwargs.get("figsize"):
+                # Convert from pixels to inches
+                fig.set_size_inches(
+                    kwargs["figsize"][0] // fig.get_dpi(),
+                    kwargs["figsize"][1] // fig.get_dpi(),
+                )
+            plt.tight_layout()
+            if kwargs.get("filename"):
+                fig.savefig(name)
+
+            # Log plot to mlflow run of every model visualized
+            if self.experiment and self.log_plots:
+                for m in set(BasePlot._fig.used_models):
+                    MlflowClient().log_figure(
+                        run_id=m._run.info.run_id,
+                        figure=fig,
+                        artifact_file=name if "." in name else f"{name}.png",
+                    )
+
+            plt.show() if kwargs.get("display") else plt.close()
+            if kwargs.get("display") is None:
+                return fig
+
+    @composed(contextmanager, crash)
+    def canvas(
+        self,
+        rows: INT = 1,
+        cols: INT = 2,
+        *,
+        horizontal_spacing: FLOAT = 0.05,
+        vertical_spacing: FLOAT = 0.07,
+        title: str | dict | None = None,
+        legend: str | dict | None = "out",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: BOOL = True,
+    ):
+        """Create a figure with multiple plots.
+
+        This `@contextmanager` allows you to draw many plots in one
+        figure. The default option is to add two plots side by side.
+        See the [user guide][canvas] for an example.
+
+        Parameters
+        ----------
+        rows: int, default=1
+            Number of plots in length.
+
+        cols: int, default=2
+            Number of plots in width.
+
+        horizontal_spacing: float, default=0.05
+            Space between subplot rows in normalized plot coordinates.
+            The spacing is relative to the figure's size.
+
+        vertical_spacing: float, default=0.07
+            Space between subplot cols in normalized plot coordinates.
+            The spacing is relative to the figure's size.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: bool, str or dict, default="out"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of plots in the canvas.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool, default=True
+            Whether to render the plot.
+
+        Yields
+        ------
+        [go.Figure][]
+            Plot object.
+
+        """
+        BasePlot._fig = BaseFigure(
+            rows=rows,
+            cols=cols,
+            horizontal_spacing=horizontal_spacing,
+            vertical_spacing=vertical_spacing,
+            palette=self.palette,
+            is_canvas=True,
+        )
+
+        try:
+            yield BasePlot._fig.figure
+        finally:
+            BasePlot._fig.is_canvas = False  # Close the canvas
+            self._plot(
+                groupclick="togglegroup",
+                title=title,
+                legend=legend,
+                figsize=figsize or (550 + 350 * cols, 200 + 400 * rows),
+                plotname="canvas",
+                filename=filename,
+                display=display,
+            )
+
+    def reset_aesthetics(self):
+        """Reset the plot [aesthetics][] to their default values."""
+        self._custom_layout = {}
+        self._custom_traces = {}
+        self._aesthetics = Aesthetics(
+            palette=PALETTE,
+            title_fontsize=24,
+            label_fontsize=16,
+            tick_fontsize=12,
+            line_width=2,
+            marker_size=8,
+        )
+
+    def update_layout(self, **kwargs):
+        """Update the properties of the plot's layout.
+
+        Recursively update the structure of the original layout with
+        the values in the arguments.
+
+        Parameters
+        ----------
+        **kwargs
+            Keyword arguments for the figure's [update_layout][] method.
+
+        """
+        self._custom_layout = kwargs
+
+    def update_traces(self, **kwargs):
+        """Update the properties of the plot's traces.
+
+        Recursively update the structure of the original traces with
+        the values in the arguments.
+
+        Parameters
+        ----------
+        **kwargs
+            Keyword arguments for the figure's [update_traces][] method.
+
+        """
+        self._custom_traces = kwargs
diff --git a/atom/plots/dataplot.py b/atom/plots/dataplot.py
new file mode 100644
index 000000000..105e7cd6d
--- /dev/null
+++ b/atom/plots/dataplot.py
@@ -0,0 +1,985 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the DataPlot class.
+
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from nltk.collocations import (
+    BigramCollocationFinder, QuadgramCollocationFinder,
+    TrigramCollocationFinder,
+)
+from scipy import stats
+from typeguard import typechecked
+
+from atom.plots.base import BasePlot
+from atom.utils.constants import PALETTE
+from atom.utils.types import INT, LEGEND, SEQUENCE, SERIES, SLICE
+from atom.utils.utils import (
+    check_dependency, crash, divide, get_corpus, lst, rnd,
+)
+
+
+@typechecked
+class DataPlot(BasePlot):
+    """Data plots.
+
+    Plots used for understanding and interpretation of the dataset.
+    They are only accessible from atom since. The other runners should
+    be used for model training only, not for data manipulation.
+
+    """
+
+    @crash
+    def plot_correlation(
+        self,
+        columns: slice | SEQUENCE | None = None,
+        method: str = "pearson",
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (800, 700),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a correlation matrix.
+
+        Displays a heatmap showing the correlation between columns in
+        the dataset. The colors red, blue and white stand for positive,
+        negative, and no correlation respectively.
+
+        Parameters
+        ----------
+        columns: slice, sequence or None, default=None
+            Columns to plot. If None, plot all columns in the dataset.
+            Selected categorical columns are ignored.
+
+        method: str, default="pearson"
+            Method of correlation. Choose from: pearson, kendall or
+            spearman.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple, default=(800, 700)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_distribution
+        atom.plots:DataPlot.plot_qq
+        atom.plots:DataPlot.plot_relationships
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.plot_correlation()
+        ```
+
+        """
+        columns = self.branch._get_columns(columns, only_numerical=True)
+        if method.lower() not in ("pearson", "kendall", "spearman"):
+            raise ValueError(
+                f"Invalid value for the method parameter, got {method}. "
+                "Choose from: pearson, kendall or spearman."
+            )
+
+        # Compute the correlation matrix
+        corr = self.dataset[columns].corr(method=method.lower())
+
+        # Generate a mask for the lower triangle
+        # k=1 means keep outermost diagonal line
+        mask = np.zeros_like(corr, dtype=bool)
+        mask[np.triu_indices_from(mask, k=1)] = True
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes(
+            x=(0, 0.87),
+            coloraxis=dict(
+                colorscale="rdbu_r",
+                cmin=-1,
+                cmax=1,
+                title=f"{method.lower()} correlation",
+                font_size=self.label_fontsize,
+            ),
+        )
+
+        fig.add_trace(
+            go.Heatmap(
+                z=corr.mask(mask),
+                x=columns,
+                y=columns,
+                coloraxis=f"coloraxis{xaxis[1:]}",
+                hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
+                hoverongaps=False,
+                showlegend=False,
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        fig.update_layout(
+            {
+                "template": "plotly_white",
+                f"yaxis{yaxis[1:]}_autorange": "reversed",
+                f"xaxis{xaxis[1:]}_showgrid": False,
+                f"yaxis{yaxis[1:]}_showgrid": False,
+            }
+        )
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_correlation",
+            filename=filename,
+            display=display,
+        )
+
+    @crash
+    def plot_distribution(
+        self,
+        columns: SLICE = 0,
+        distributions: str | SEQUENCE | None = None,
+        show: INT | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot column distributions.
+
+        - For numerical columns, plot the probability density
+          distribution. Additionally, it's possible to plot any of
+          `scipy.stats` distributions fitted to the column.
+        - For categorical columns, plot the class distribution.
+          Only one categorical column can be plotted at the same time.
+
+        !!! tip
+            Use atom's [distribution][atomclassifier-distribution]
+            method to check which distribution fits the column best.
+
+        Parameters
+        ----------
+        columns: int, str, slice or sequence, default=0
+            Columns to plot. I's only possible to plot one categorical
+            column. If more than one categorical columns are selected,
+            all categorical columns are ignored.
+
+        distributions: str, sequence or None, default=None
+            Names of the `scipy.stats` distributions to fit to the
+            columns. If None, a [Gaussian kde distribution][kde] is
+            showed. Only for numerical columns.
+
+        show: int or None, default=None
+            Number of classes (ordered by number of occurrences) to
+            show in the plot. If None, it shows all classes. Only for
+            categorical columns.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None: No title is shown.
+            - If str: Text for the title.
+            - If dict: [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the plot's type.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_correlation
+        atom.plots:DataPlot.plot_qq
+        atom.plots:DataPlot.plot_relationships
+
+        Examples
+        --------
+        ```pycon
+        import numpy as np
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        # Add a categorical feature
+        animals = ["cat", "dog", "bird", "lion", "zebra"]
+        probabilities = [0.001, 0.1, 0.2, 0.3, 0.399]
+        X["animals"] = np.random.choice(animals, size=len(X), p=probabilities)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.plot_distribution(columns=[0, 1])
+        atom.plot_distribution(columns=0, distributions=["norm", "invgauss"])
+        atom.plot_distribution(columns="animals")
+        ```
+
+        """
+        columns = self.branch._get_columns(columns)
+        cat_columns = list(self.dataset.select_dtypes(exclude="number").columns)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        if len(columns) == 1 and columns[0] in cat_columns:
+            series = self.dataset[columns[0]].value_counts(ascending=True)
+
+            if show is None or show > len(series):
+                show = len(series)
+            elif show < 1:
+                raise ValueError(
+                    "Invalid value for the show parameter."
+                    f"Value should be >0, got {show}."
+                )
+
+            color = BasePlot._fig.get_elem()
+            fig.add_trace(
+                go.Bar(
+                    x=series,
+                    y=series.index,
+                    orientation="h",
+                    marker=dict(
+                        color=f"rgba({color[4:-1]}, 0.2)",
+                        line=dict(width=2, color=color),
+                    ),
+                    hovertemplate="%{x}<extra></extra>",
+                    name=f"{columns[0]}: {len(series)} classes",
+                    showlegend=BasePlot._fig.showlegend("dist", legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+            return self._plot(
+                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                xlabel="Counts",
+                ylim=(len(series) - show - 0.5, len(series) - 0.5),
+                title=title,
+                legend=legend,
+                figsize=figsize or (900, 400 + show * 50),
+                plotname="plot_distribution",
+                filename=filename,
+                display=display,
+            )
+
+        else:
+            for col in [c for c in columns if c not in cat_columns]:
+                fig.add_trace(
+                    go.Histogram(
+                        x=self.dataset[col],
+                        histnorm="probability density",
+                        marker=dict(
+                            color=f"rgba({BasePlot._fig.get_elem(col)[4:-1]}, 0.2)",
+                            line=dict(width=2, color=BasePlot._fig.get_elem(col)),
+                        ),
+                        nbinsx=40,
+                        name="dist",
+                        legendgroup=col,
+                        legendgrouptitle=dict(text=col, font_size=self.label_fontsize),
+                        showlegend=BasePlot._fig.showlegend(f"{col}-dist", legend),
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                x = np.linspace(self.dataset[col].min(), self.dataset[col].max(), 200)
+
+                # Drop missing values for compatibility with scipy.stats
+                missing = self.missing + [np.inf, -np.inf]
+                values = self.dataset[col].replace(missing, np.NaN).dropna()
+
+                if distributions:
+                    # Get a line for each distribution
+                    for j, dist in enumerate(lst(distributions)):
+                        params = getattr(stats, dist).fit(values)
+
+                        fig.add_trace(
+                            self._draw_line(
+                                x=x,
+                                y=getattr(stats, dist).pdf(x, *params),
+                                parent=col,
+                                child=dist,
+                                legend=legend,
+                                xaxis=xaxis,
+                                yaxis=yaxis,
+                            )
+                        )
+                else:
+                    # If no distributions specified, draw Gaussian kde
+                    fig.add_trace(
+                        self._draw_line(
+                            x=x,
+                            y=stats.gaussian_kde(values)(x),
+                            parent=col,
+                            child="kde",
+                            legend=legend,
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        )
+                    )
+
+            fig.update_layout(dict(barmode="overlay"))
+
+            return self._plot(
+                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                xlabel="Values",
+                ylabel="Probability density",
+                title=title,
+                legend=legend,
+                figsize=figsize or (900, 600),
+                plotname="plot_distribution",
+                filename=filename,
+                display=display,
+            )
+
+    @crash
+    def plot_ngrams(
+        self,
+        ngram: INT | str = "bigram",
+        index: SLICE | None = None,
+        show: INT = 10,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot n-gram frequencies.
+
+        The text for the plot is extracted from the column named
+        `corpus`. If there is no column with that name, an exception
+        is raised. If the documents are not tokenized, the words are
+        separated by spaces.
+
+        !!! tip
+            Use atom's [tokenize][atomclassifier-tokenize] method to
+            separate the words creating n-grams based on their frequency
+            in the corpus.
+
+        Parameters
+        ----------
+        ngram: str or int, default="bigram"
+            Number of contiguous words to search for (size of n-gram).
+            Choose from: words (1), bigrams (2), trigrams (3),
+            quadgrams (4).
+
+        index: int, str, slice, sequence or None, default=None
+            Documents in the corpus to include in the search. If None,
+            it selects all documents in the dataset.
+
+        show: int, default=10
+            Number of n-grams (ordered by number of occurrences) to
+            show in the plot.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of n-grams shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_wordcloud
+
+        Examples
+        --------
+        ```pycon
+        import numpy as np
+        from atom import ATOMClassifier
+        from sklearn.datasets import fetch_20newsgroups
+
+        X, y = fetch_20newsgroups(
+            return_X_y=True,
+            categories=["alt.atheism", "sci.med", "comp.windows.x"],
+            shuffle=True,
+            random_state=1,
+        )
+        X = np.array(X).reshape(-1, 1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.textclean()
+        atom.textnormalize()
+        atom.plot_ngrams()
+        ```
+
+        """
+
+        def get_text(column: SERIES) -> SERIES:
+            """Get the complete corpus as sequence of tokens.
+
+            Parameters
+            ----------
+            column: series
+                Column containing the corpus.
+
+            Returns
+            -------
+            series
+                Corpus of tokens.
+
+            """
+            if isinstance(column.iat[0], str):
+                return column.apply(lambda row: row.split())
+            else:
+                return column
+
+        corpus = get_corpus(self.X)
+        rows = self.dataset.loc[self.branch._get_rows(index, return_test=False)]
+
+        if str(ngram).lower() in ("1", "word", "words"):
+            ngram = "words"
+            series = pd.Series(
+                [word for row in get_text(rows[corpus]) for word in row]
+            ).value_counts(ascending=True)
+        else:
+            if str(ngram).lower() in ("2", "bigram", "bigrams"):
+                ngram, finder = "bigrams", BigramCollocationFinder
+            elif str(ngram).lower() in ("3", "trigram", "trigrams"):
+                ngram, finder = "trigrams", TrigramCollocationFinder
+            elif str(ngram).lower() in ("4", "quadgram", "quadgrams"):
+                ngram, finder = "quadgrams", QuadgramCollocationFinder
+            else:
+                raise ValueError(
+                    f"Invalid value for the ngram parameter, got {ngram}. "
+                    "Choose from: words, bigram, trigram, quadgram."
+                )
+
+            ngram_fd = finder.from_documents(get_text(rows[corpus])).ngram_fd
+            series = pd.Series(
+                data=[x[1] for x in ngram_fd.items()],
+                index=[" ".join(x[0]) for x in ngram_fd.items()],
+            ).sort_values(ascending=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        fig.add_trace(
+            go.Bar(
+                x=(data := series[-show:]),
+                y=data.index,
+                orientation="h",
+                marker=dict(
+                    color=f"rgba({BasePlot._fig.get_elem(ngram)[4:-1]}, 0.2)",
+                    line=dict(width=2, color=BasePlot._fig.get_elem(ngram)),
+                ),
+                hovertemplate="%{x}<extra></extra>",
+                name=f"Total {ngram}: {len(series)}",
+                legendgroup=ngram,
+                showlegend=BasePlot._fig.showlegend(ngram, legend),
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Counts",
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_ngrams",
+            filename=filename,
+            display=display,
+        )
+
+    @crash
+    def plot_qq(
+        self,
+        columns: SLICE = 0,
+        distributions: str | SEQUENCE = "norm",
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a quantile-quantile plot.
+
+        Columns are distinguished by color and the distributions are
+        distinguished by marker type. Missing values are ignored.
+
+        Parameters
+        ----------
+        columns: int, str, slice or sequence, default=0
+            Columns to plot. Selected categorical columns are ignored.
+
+        distributions: str or sequence, default="norm"
+            Names of the `scipy.stats` distributions to fit to the
+            columns.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_correlation
+        atom.plots:DataPlot.plot_distribution
+        atom.plots:DataPlot.plot_relationships
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.plot_qq(columns=[5, 6])
+        atom.plot_qq(columns=0, distributions=["norm", "invgauss", "triang"])
+        ```
+
+        """
+        columns = self.branch._get_columns(columns)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        percentiles = np.linspace(0, 100, 101)
+        for col in columns:
+            # Drop missing values for compatibility with scipy.stats
+            missing = self.missing + [np.inf, -np.inf]
+            values = self.dataset[col].replace(missing, np.NaN).dropna()
+
+            for dist in lst(distributions):
+                stat = getattr(stats, dist)
+                params = stat.fit(values)
+                samples = stat.rvs(*params, size=101, random_state=self.random_state)
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=np.percentile(samples, percentiles),
+                        y=np.percentile(values, percentiles),
+                        mode="markers",
+                        parent=col,
+                        child=dist,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Theoretical quantiles",
+            ylabel="Observed quantiles",
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 600),
+            plotname="plot_qq",
+            filename=filename,
+            display=display,
+        )
+
+    @crash
+    def plot_relationships(
+        self,
+        columns: slice | SEQUENCE = (0, 1, 2),
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 900),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot pairwise relationships in a dataset.
+
+        Creates a grid of axes such that each numerical column appears
+        once on the x-axes and once on the y-axes. The bottom triangle
+        contains scatter plots (max 250 random samples), the diagonal
+        plots contain column distributions, and the upper triangle
+        contains contour histograms for all samples in the columns.
+
+        Parameters
+        ----------
+        columns: slice or sequence, default=(0, 1, 2)
+            Columns to plot. Selected categorical columns are ignored.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple, default=(900, 900)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_correlation
+        atom.plots:DataPlot.plot_distribution
+        atom.plots:DataPlot.plot_qq
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.plot_relationships(columns=[0, 4, 5])
+        ```
+
+        """
+        columns = self.branch._get_columns(columns, only_numerical=True)
+
+        # Use max 250 samples to not clutter the plot
+        sample = lambda col: self.dataset[col].sample(
+            n=min(len(self.dataset), 250), random_state=self.random_state
+        )
+
+        fig = self._get_figure()
+        color = BasePlot._fig.get_elem()
+        for i in range(len(columns)**2):
+            x, y = i // len(columns), i % len(columns)
+
+            # Calculate the distance between subplots
+            offset = divide(0.0125, (len(columns) - 1))
+
+            # Calculate the size of the subplot
+            size = (1 - ((offset * 2) * (len(columns) - 1))) / len(columns)
+
+            # Determine the position for the axes
+            x_pos = y * (size + 2 * offset)
+            y_pos = (len(columns) - x - 1) * (size + 2 * offset)
+
+            xaxis, yaxis = BasePlot._fig.get_axes(
+                x=(x_pos, rnd(x_pos + size)),
+                y=(y_pos, rnd(y_pos + size)),
+                coloraxis=dict(
+                    colorscale=PALETTE.get(color, "Blues"),
+                    cmin=0,
+                    cmax=len(self.dataset),
+                    showscale=False,
+                )
+            )
+
+            if x == y:
+                fig.add_trace(
+                    go.Histogram(
+                        x=self.dataset[columns[x]],
+                        marker=dict(
+                            color=f"rgba({color[4:-1]}, 0.2)",
+                            line=dict(width=2, color=color),
+                        ),
+                        name=columns[x],
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+            elif x > y:
+                fig.add_trace(
+                    go.Scatter(
+                        x=sample(columns[y]),
+                        y=sample(columns[x]),
+                        mode="markers",
+                        marker=dict(color=color),
+                        hovertemplate="(%{x}, %{y})<extra></extra>",
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+            elif y > x:
+                fig.add_trace(
+                    go.Histogram2dContour(
+                        x=self.dataset[columns[y]],
+                        y=self.dataset[columns[x]],
+                        coloraxis=f"coloraxis{xaxis[1:]}",
+                        hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+            if x < len(columns) - 1:
+                fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
+            if y > 0:
+                fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
+
+            self._plot(
+                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                xlabel=columns[y] if x == len(columns) - 1 else None,
+                ylabel=columns[x] if y == 0 else None,
+            )
+
+        return self._plot(
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 900),
+            plotname="plot_relationships",
+            filename=filename,
+            display=display,
+        )
+
+    @crash
+    def plot_wordcloud(
+        self,
+        index: SLICE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+        **kwargs,
+    ) -> go.Figure | None:
+        """Plot a wordcloud from the corpus.
+
+        The text for the plot is extracted from the column named
+        `corpus`. If there is no column with that name, an exception
+        is raised.
+
+        Parameters
+        ----------
+        index: int, str, slice, sequence or None, default=None
+            Documents in the corpus to include in the wordcloud. If
+            None, it selects all documents in the dataset.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        **kwargs
+            Additional keyword arguments for the [Wordcloud][] object.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_ngrams
+        atom.plots:PredictionPlot.plot_pipeline
+
+        Examples
+        --------
+        ```pycon
+        import numpy as np
+        from atom import ATOMClassifier
+        from sklearn.datasets import fetch_20newsgroups
+
+        X, y = fetch_20newsgroups(
+            return_X_y=True,
+            categories=["alt.atheism", "sci.med", "comp.windows.x"],
+            shuffle=True,
+            random_state=1,
+        )
+        X = np.array(X).reshape(-1, 1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.textclean()
+        atom.textnormalize()
+        atom.plot_wordcloud()
+        ```
+
+        """
+
+        def get_text(column):
+            """Get the complete corpus as one long string."""
+            if isinstance(column.iat[0], str):
+                return " ".join(column)
+            else:
+                return " ".join([" ".join(row) for row in column])
+
+        check_dependency("wordcloud")
+        from wordcloud import WordCloud
+
+        corpus = get_corpus(self.X)
+        rows = self.dataset.loc[self.branch._get_rows(index, return_test=False)]
+
+        wordcloud = WordCloud(
+            width=figsize[0],
+            height=figsize[1],
+            background_color=kwargs.pop("background_color", "white"),
+            random_state=kwargs.pop("random_state", self.random_state),
+            **kwargs,
+        )
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        fig.add_trace(
+            go.Image(
+                z=wordcloud.generate(get_text(rows[corpus])),
+                hoverinfo="skip",
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        fig.update_layout(
+            {
+                f"xaxis{xaxis[1:]}_showticklabels": False,
+                f"yaxis{xaxis[1:]}_showticklabels": False,
+            }
+        )
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 600),
+            plotname="plot_wordcloud",
+            filename=filename,
+            display=display,
+        )
diff --git a/atom/plots/featureselectionplot.py b/atom/plots/featureselectionplot.py
new file mode 100644
index 000000000..79f83e1f3
--- /dev/null
+++ b/atom/plots/featureselectionplot.py
@@ -0,0 +1,428 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the FeatureSelectionPlot class.
+
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import plotly.graph_objects as go
+from sklearn.utils.metaestimators import available_if
+from typeguard import typechecked
+
+from atom.plots.base import BasePlot
+from atom.utils.types import INT, LEGEND
+from atom.utils.utils import crash, has_attr
+
+
+@typechecked
+class FeatureSelectionPlot(BasePlot):
+    """Feature selection plots.
+
+    These plots are accessible from atom or from the FeatureSelector
+    class when the appropriate feature selection strategy is used.
+
+    """
+
+    @available_if(has_attr("pca"))
+    @crash
+    def plot_components(
+        self,
+        show: INT | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the explained variance ratio per component.
+
+        Kept components are colored and discarted components are
+        transparent. This plot is available only when feature selection
+        was applied with strategy="pca".
+
+        Parameters
+        ----------
+        show: int or None, default=None
+            Number of components to show. None to show all.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of components shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:FeatureSelectionPlot.plot_pca
+        atom.plots:FeatureSelectionPlot.plot_rfecv
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.feature_selection("pca", n_features=5)
+        atom.plot_components(show=10)
+        ```
+
+        """
+        if show is None or show > self.pca.components_.shape[0]:
+            # Limit max features shown to avoid maximum figsize error
+            show = min(200, self.pca.components_.shape[0])
+        elif show < 1:
+            raise ValueError(
+                "Invalid value for the show parameter. "
+                f"Value should be >0, got {show}."
+            )
+
+        # Get the variance ratio per component
+        variance = np.array(self.pca.explained_variance_ratio_)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        # Create color scheme: first normal and then fully transparent
+        color = BasePlot._fig.get_elem("components")
+        opacity = [0.2] * self.pca._comps + [0] * (len(variance) - self.pca._comps)
+
+        fig.add_trace(
+            go.Bar(
+                x=variance,
+                y=[f"pca{str(i)}" for i in range(len(variance))],
+                orientation="h",
+                marker=dict(
+                    color=[f"rgba({color[4:-1]}, {o})" for o in opacity],
+                    line=dict(width=2, color=color),
+                ),
+                hovertemplate="%{x}<extra></extra>",
+                name=f"Variance retained: {variance[:self.pca._comps].sum():.3f}",
+                legendgroup="components",
+                showlegend=BasePlot._fig.showlegend("components", legend),
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        fig.update_layout({f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending")})
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Explained variance ratio",
+            ylim=(len(variance) - show - 0.5, len(variance) - 0.5),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_components",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_attr("pca"))
+    @crash
+    def plot_pca(
+        self,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the explained variance ratio vs number of components.
+
+        If the underlying estimator is [PCA][] (for dense datasets),
+        all possible components are plotted. If the underlying estimator
+        is [TruncatedSVD][] (for sparse datasets), it only shows the
+        selected components. The star marks the number of components
+        selected by the user. This plot is available only when feature
+        selection was applied with strategy="pca".
+
+        Parameters
+        ----------
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:FeatureSelectionPlot.plot_components
+        atom.plots:FeatureSelectionPlot.plot_rfecv
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.feature_selection("pca", n_features=5)
+        atom.plot_pca()
+        ```
+
+        """
+        # Create star symbol at selected number of components
+        symbols = ["circle"] * self.pca.n_features_in_
+        symbols[self.pca._comps - 1] = "star"
+        sizes = [self.marker_size] * self.pca.n_features_in_
+        sizes[self.pca._comps - 1] = self.marker_size * 1.5
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        fig.add_trace(
+            go.Scatter(
+                x=tuple(range(1, self.pca.n_features_in_ + 1)),
+                y=np.cumsum(self.pca.explained_variance_ratio_),
+                mode="lines+markers",
+                line=dict(width=self.line_width, color=BasePlot._fig.get_elem("pca")),
+                marker=dict(
+                    symbol=symbols,
+                    size=sizes,
+                    line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                    opacity=1,
+                ),
+                hovertemplate="%{y}<extra></extra>",
+                showlegend=False,
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        fig.update_layout(
+            {
+                "hovermode": "x",
+                f"xaxis{xaxis[1:]}_showspikes": True,
+                f"yaxis{yaxis[1:]}_showspikes": True,
+            }
+        )
+
+        margin = self.pca.n_features_in_ / 30
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="First N principal components",
+            ylabel="Cumulative variance ratio",
+            xlim=(1 - margin, self.pca.n_features_in_ - 1 + margin),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_pca",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_attr("rfecv"))
+    @crash
+    def plot_rfecv(
+        self,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the rfecv results.
+
+        Plot the scores obtained by the estimator fitted on every
+        subset of the dataset. Only available when feature selection
+        was applied with strategy="rfecv".
+
+        Parameters
+        ----------
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:FeatureSelectionPlot.plot_components
+        atom.plots:FeatureSelectionPlot.plot_pca
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.feature_selection("rfecv", solver="Tree")
+        atom.plot_rfecv()
+        ```
+
+        """
+        try:  # Define the y-label for the plot
+            ylabel = self.rfecv.get_params()["scoring"].name
+        except AttributeError:
+            ylabel = "accuracy" if self.goal.startswith("class") else "r2"
+
+        x = range(self.rfecv.min_features_to_select, self.rfecv.n_features_in_ + 1)
+
+        # Create star symbol at selected number of features
+        sizes = [6] * len(x)
+        sizes[self.rfecv.n_features_ - self.rfecv.min_features_to_select] = 12
+        symbols = ["circle"] * len(x)
+        symbols[self.rfecv.n_features_ - self.rfecv.min_features_to_select] = "star"
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        mean = self.rfecv.cv_results_["mean_test_score"]
+        std = self.rfecv.cv_results_["std_test_score"]
+
+        fig.add_trace(
+            go.Scatter(
+                x=list(x),
+                y=mean,
+                mode="lines+markers",
+                line=dict(width=self.line_width, color=BasePlot._fig.get_elem("rfecv")),
+                marker=dict(
+                    symbol=symbols,
+                    size=sizes,
+                    line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                    opacity=1,
+                ),
+                name=ylabel,
+                legendgroup="rfecv",
+                showlegend=BasePlot._fig.showlegend("rfecv", legend),
+                xaxis=xaxis,
+                yaxis=yaxis,
+            )
+        )
+
+        # Add error bands
+        fig.add_traces(
+            [
+                go.Scatter(
+                    x=tuple(x),
+                    y=mean + std,
+                    mode="lines",
+                    line=dict(width=1, color=BasePlot._fig.get_elem("rfecv")),
+                    hovertemplate="%{y}<extra>upper bound</extra>",
+                    legendgroup="rfecv",
+                    showlegend=False,
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                ),
+                go.Scatter(
+                    x=tuple(x),
+                    y=mean - std,
+                    mode="lines",
+                    line=dict(width=1, color=BasePlot._fig.get_elem("rfecv")),
+                    fill="tonexty",
+                    fillcolor=f"rgba{BasePlot._fig.get_elem('rfecv')[3:-1]}, 0.2)",
+                    hovertemplate="%{y}<extra>lower bound</extra>",
+                    legendgroup="rfecv",
+                    showlegend=False,
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                ),
+            ]
+        )
+
+        fig.update_layout({"hovermode": "x unified"})
+
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            xlabel="Number of features",
+            ylabel=ylabel,
+            xlim=(min(x) - len(x) / 30, max(x) + len(x) / 30),
+            ylim=(min(mean) - 3 * max(std), max(mean) + 3 * max(std)),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_rfecv",
+            filename=filename,
+            display=display,
+        )
diff --git a/atom/plots/hyperparametertuningplot.py b/atom/plots/hyperparametertuningplot.py
new file mode 100644
index 000000000..08f09893a
--- /dev/null
+++ b/atom/plots/hyperparametertuningplot.py
@@ -0,0 +1,1453 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the HyperparameterTuningPlot class.
+
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+import numpy as np
+import plotly.graph_objects as go
+from optuna.importance import FanovaImportanceEvaluator
+from optuna.trial import TrialState
+from optuna.visualization._parallel_coordinate import (
+    _get_dims_from_info, _get_parallel_coordinate_info,
+)
+from optuna.visualization._terminator_improvement import _get_improvement_info
+from optuna.visualization._utils import _is_log_scale
+from sklearn.utils._bunch import Bunch
+from typeguard import typechecked
+
+from atom.plots.base import BasePlot
+from atom.utils.constants import PALETTE
+from atom.utils.types import INT, INT_TYPES, LEGEND, MODEL, SEQUENCE
+from atom.utils.utils import (
+    check_dependency, check_hyperparams, composed, crash, divide, it, lst,
+    plot_from_model, rnd,
+)
+
+
+@typechecked
+class HyperparameterTuningPlot(BasePlot):
+    """Hyperparameter tuning plots.
+
+    Plots that help interpret the model's study and corresponding
+    trials. These plots are accessible from the runners or from the
+    models. If called from a runner, the `models` parameter has to be
+    specified (if None, uses all models). If called from a model, that
+    model is used and the `models` parameter becomes unavailable.
+
+    """
+
+    @composed(crash, plot_from_model)
+    def plot_edf(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the Empirical Distribution Function of a study.
+
+        Use this plot to analyze and improve hyperparameter search
+        spaces. The EDF assumes that the value of the objective
+        function is in accordance with the uniform distribution over
+        the objective space. This plot is only available for models
+        that ran [hyperparameter tuning][].
+
+        !!! note
+            Only complete trials are considered when plotting the EDF.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models that used hyperparameter
+            tuning are selected.
+
+        metric: int, str, sequence or None, default=None
+            Metric to plot (only for multi-metric runs). If str, add `+`
+            between options to select more than one. If None, the metric
+            used to run the pipeline is selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameters
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from optuna.distributions import IntDistribution
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+
+        # Run three models with different search spaces
+        atom.run(
+            models="RF_1",
+            n_trials=10,
+            ht_params={"distributions": {"n_estimators": IntDistribution(6, 10)}},
+        )
+        atom.run(
+            models="RF_2",
+            n_trials=10,
+            ht_params={"distributions": {"n_estimators": IntDistribution(11, 15)}},
+        )
+        atom.run(
+            models="RF_3",
+            n_trials=10,
+            ht_params={"distributions": {"n_estimators": IntDistribution(16, 20)}},
+        )
+
+        atom.plot_edf()
+        ```
+
+        """
+        models = check_hyperparams(models, "plot_edf")
+        metric = self._get_metric(metric, max_one=False)
+
+        values = []
+        for m in models:
+            values.append([])
+            for met in metric:
+                values[-1].append(np.array([lst(row)[met] for row in m.trials["score"]]))
+
+        x_min = np.nanmin(np.array(values))
+        x_max = np.nanmax(np.array(values))
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m, val in zip(models, values):
+            for met in metric:
+                fig.add_trace(
+                    self._draw_line(
+                        x=(x := np.linspace(x_min, x_max, 100)),
+                        y=np.sum(val[met][:, np.newaxis] <= x, axis=0) / len(val[met]),
+                        parent=m.name,
+                        child=self._metric[met].name,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            ylim=(0, 1),
+            xlabel="Score",
+            ylabel="Cumulative Probability",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_edf",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_hyperparameter_importance(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: int | str = 0,
+        show: INT | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a model's hyperparameter importance.
+
+        The hyperparameter importance are calculated using the
+        [fANOVA][] importance evaluator. The sum of importances for all
+        parameters (per model) is 1. This plot is only available for
+        models that ran [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models that used hyperparameter
+            tuning are selected.
+
+        metric: int or str, default=0
+            Metric to plot (only for multi-metric runs).
+
+        show: int or None, default=None
+            Number of hyperparameters (ordered by importance) to show.
+            None to show all.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of hyperparameters shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_feature_importance
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameters
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["ET", "RF"], n_trials=10)
+        atom.plot_hyperparameter_importance()
+        ```
+
+        """
+        models = check_hyperparams(models, "plot_hyperparameter_importance")
+        params = len(set([k for m in lst(models) for k in m._ht["distributions"]]))
+        met = self._get_metric(metric, max_one=True)
+
+        if show is None or show > params:
+            # Limit max features shown to avoid maximum figsize error
+            show = min(200, params)
+        elif show < 1:
+            raise ValueError(
+                f"Invalid value for the show parameter. Value should be >0, got {show}."
+            )
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            importances = FanovaImportanceEvaluator(seed=self.random_state).evaluate(
+                study=m.study,
+                target=None if len(self._metric) == 1 else lambda x: x.values[met],
+            )
+
+            fig.add_trace(
+                go.Bar(
+                    x=np.array(list(importances.values())) / sum(importances.values()),
+                    y=list(importances.keys()),
+                    orientation="h",
+                    marker=dict(
+                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
+                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    ),
+                    hovertemplate="%{x}<extra></extra>",
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
+                "bargroupgap": 0.05,
+            }
+        )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Normalized hyperparameter importance",
+            ylim=(params - show - 0.5, params - 0.5),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_hyperparameter_importance",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_hyperparameters(
+        self,
+        models: INT | str | MODEL | None = None,
+        params: str | slice | SEQUENCE = (0, 1),
+        metric: int | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot hyperparameter relationships in a study.
+
+        A model's hyperparameters are plotted against each other. The
+        corresponding metric scores are displayed in a contour plot.
+        The markers are the trials in the study. This plot is only
+        available for models that ran [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_hyperparameters()`.
+
+        params: str, slice or sequence, default=(0, 1)
+            Hyperparameters to plot. Use a sequence or add `+` between
+            options to select more than one.
+
+        metric: int or str, default=0
+            Metric to plot (only for multi-metric runs).
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of hyperparameters shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameter_importance
+        atom.plots:HyperparameterTuningPlot.plot_parallel_coordinate
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR", n_trials=15)
+        atom.plot_hyperparameters(params=(0, 1, 2))
+        ```
+
+        """
+        m = check_hyperparams(models, "plot_hyperparameters")[0]
+
+        if len(params := self._get_hyperparams(params, models)) < 2:
+            raise ValueError(
+                "Invalid value for the hyperparameters parameter. A minimum "
+                f"of two parameters is required, got {len(params)}."
+            )
+
+        met = self._get_metric(metric, max_one=True)
+
+        fig = self._get_figure()
+        for i in range((length := len(params) - 1) ** 2):
+            x, y = i // length, i % length
+
+            if y <= x:
+                # Calculate the size of the subplot
+                size = 1 / length
+
+                # Determine the position for the axes
+                x_pos = y * size
+                y_pos = (length - x - 1) * size
+
+                xaxis, yaxis = BasePlot._fig.get_axes(
+                    x=(x_pos, rnd(x_pos + size)),
+                    y=(y_pos, rnd(y_pos + size)),
+                    coloraxis=dict(
+                        axes="99",
+                        colorscale=PALETTE.get(BasePlot._fig.get_elem(m.name), "Blues"),
+                        cmin=np.nanmin(
+                            m.trials.apply(lambda x: lst(x["score"])[met], axis=1)
+                        ),
+                        cmax=np.nanmax(
+                            m.trials.apply(lambda x: lst(x["score"])[met], axis=1)
+                        ),
+                        showscale=False,
+                    )
+                )
+
+                x_values = lambda row: row["params"].get(params[y], None)
+                y_values = lambda row: row["params"].get(params[x + 1], None)
+
+                fig.add_trace(
+                    go.Scatter(
+                        x=m.trials.apply(x_values, axis=1),
+                        y=m.trials.apply(y_values, axis=1),
+                        mode="markers",
+                        marker=dict(
+                            size=self.marker_size,
+                            color=BasePlot._fig.get_elem(m.name),
+                            line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                        ),
+                        customdata=list(
+                            zip(
+                                m.trials.index.tolist(),
+                                m.trials.apply(lambda x: lst(x["score"])[met], axis=1),
+                            )
+                        ),
+                        hovertemplate=(
+                            f"{params[y]}:%{{x}}<br>"
+                            f"{params[x + 1]}:%{{y}}<br>"
+                            f"{self._metric[met].name}:%{{customdata[1]:.4f}}"
+                            "<extra>Trial %{customdata[0]}</extra>"
+                        ),
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                fig.add_trace(
+                    go.Contour(
+                        x=m.trials.apply(x_values, axis=1),
+                        y=m.trials.apply(y_values, axis=1),
+                        z=m.trials.apply(lambda i: lst(i["score"])[met], axis=1),
+                        contours=dict(
+                            showlabels=True,
+                            labelfont=dict(size=self.tick_fontsize, color="white")
+                        ),
+                        coloraxis="coloraxis99",
+                        hoverinfo="skip",
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                if _is_log_scale(m.study.trials, params[y]):
+                    fig.update_layout({f"xaxis{xaxis[1:]}_type": "log"})
+                if _is_log_scale(m.study.trials, params[x + 1]):
+                    fig.update_layout({f"yaxis{xaxis[1:]}_type": "log"})
+
+                if x < length - 1:
+                    fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
+                if y > 0:
+                    fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
+
+                fig.update_layout(
+                    {
+                        "template": "plotly_white",
+                        f"xaxis{xaxis[1:]}_showgrid": False,
+                        f"yaxis{yaxis[1:]}_showgrid": False,
+                        f"xaxis{yaxis[1:]}_zeroline": False,
+                        f"yaxis{yaxis[1:]}_zeroline": False,
+                    }
+                )
+
+                self._plot(
+                    ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                    xlabel=params[y] if x == length - 1 else None,
+                    ylabel=params[x + 1] if y == 0 else None,
+                )
+
+        BasePlot._fig.used_models.append(m)
+        return self._plot(
+            title=title,
+            legend=legend,
+            figsize=figsize or (800 + 100 * length, 500 + 100 * length),
+            plotname="plot_hyperparameters",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_parallel_coordinate(
+        self,
+        models: INT | str | MODEL | None = None,
+        params: str | slice | SEQUENCE | None = None,
+        metric: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot high-dimensional parameter relationships in a study.
+
+        Every line of the plot represents one trial. This plot is only
+        available for models that ran [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_parallel_coordinate()`.
+
+        params: str, slice, sequence or None, default=None
+            Hyperparameters to plot. Use a sequence or add `+` between
+            options to select more than one. If None, all the model's
+            hyperparameters are selected.
+
+        metric: int or str, default=0
+            Metric to plot (only for multi-metric runs).
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of hyperparameters shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_edf
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameter_importance
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameters
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("RF", n_trials=15)
+        atom.plot_parallel_coordinate(params=slice(1, 5))
+        ```
+
+        """
+
+        def sort_mixed_types(values: list[str]) -> list[str]:
+            """Sort a sequence of numbers and strings.
+
+            Numbers are converted and take precedence over strings.
+
+            Parameters
+            ----------
+            values: list
+                Values to sort.
+
+            Returns
+            -------
+            list of str
+                Sorted values.
+
+            """
+            numbers, categorical = [], []
+            for elem in values:
+                try:
+                    numbers.append(it(float(elem)))
+                except (TypeError, ValueError):
+                    categorical.append(str(elem))
+
+            return list(map(str, sorted(numbers))) + sorted(categorical)
+
+        m = check_hyperparams(models, "plot_parallel_coordinate")[0]
+        params = self._get_hyperparams(params, models)
+        met = self._get_metric(metric, max_one=True)
+
+        dims = _get_dims_from_info(
+            _get_parallel_coordinate_info(
+                study=m.study,
+                params=params,
+                target=None if len(self._metric) == 1 else lambda x: x.values[met],
+                target_name=self._metric[met].name,
+            )
+        )
+
+        # Clean up dimensions for nicer view
+        for d in [dims[0]] + sorted(dims[1:], key=lambda x: params.index(x["label"])):
+            if "ticktext" in d:
+                # Skip processing for logarithmic params
+                if all(isinstance(i, INT_TYPES) for i in d["values"]):
+                    # Order categorical values
+                    mapping = [d["ticktext"][i] for i in d["values"]]
+                    d["ticktext"] = sort_mixed_types(d["ticktext"])
+                    d["values"] = [d["ticktext"].index(v) for v in mapping]
+            else:
+                # Round numerical values
+                d["tickvals"] = list(
+                    map(rnd, np.linspace(min(d["values"]), max(d["values"]), 5))
+                )
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes(
+            coloraxis=dict(
+                colorscale=PALETTE.get(BasePlot._fig.get_elem(m.name), "Blues"),
+                cmin=min(dims[0]["values"]),
+                cmax=max(dims[0]["values"]),
+                title=self._metric[met].name,
+                font_size=self.label_fontsize,
+            )
+        )
+
+        fig.add_trace(
+            go.Parcoords(
+                dimensions=dims,
+                line=dict(
+                    color=dims[0]["values"],
+                    coloraxis=f"coloraxis{xaxis[1:]}",
+                ),
+                unselected=dict(line=dict(color="gray", opacity=0.5)),
+                labelside="bottom",
+                labelfont=dict(size=self.label_fontsize),
+            )
+        )
+
+        BasePlot._fig.used_models.append(m)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            title=title,
+            legend=legend,
+            figsize=figsize or (700 + len(params) * 50, 600),
+            plotname="plot_parallel_coordinate",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_pareto_front(
+        self,
+        models: INT | str | MODEL | None = None,
+        metric: str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the Pareto front of a study.
+
+        Shows the trial scores plotted against each other. The marker's
+        colors indicate the trial number. This plot is only available
+        for models that ran [multi-metric runs][] with
+        [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_pareto_front()`.
+
+        metric: str, sequence or None, default=None
+            Metrics to plot.  Use a sequence or add `+` between options
+            to select more than one. If None, the metrics used to run
+            the pipeline are selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of metrics shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_edf
+        atom.plots:HyperparameterTuningPlot.plot_slice
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(
+            models="RF",
+            metric=["f1", "accuracy", "recall"],
+            n_trials=15,
+         )
+        atom.plot_pareto_front()
+        ```
+
+        """
+        m = check_hyperparams(models, "plot_pareto_front")[0]
+
+        if len(metric := self._get_metric(metric, max_one=False)) < 2:
+            raise ValueError(
+                "Invalid value for the metric parameter. A minimum "
+                f"of two metrics are required, got {len(metric)}."
+            )
+
+        fig = self._get_figure()
+        for i in range((length := len(metric) - 1) ** 2):
+            x, y = i // length, i % length
+
+            if y <= x:
+                # Calculate the distance between subplots
+                offset = divide(0.0125, length - 1)
+
+                # Calculate the size of the subplot
+                size = (1 - ((offset * 2) * (length - 1))) / length
+
+                # Determine the position for the axes
+                x_pos = y * (size + 2 * offset)
+                y_pos = (length - x - 1) * (size + 2 * offset)
+
+                xaxis, yaxis = BasePlot._fig.get_axes(
+                    x=(x_pos, rnd(x_pos + size)),
+                    y=(y_pos, rnd(y_pos + size)),
+                )
+
+                fig.add_trace(
+                    go.Scatter(
+                        x=m.trials.apply(lambda row: row["score"][y], axis=1),
+                        y=m.trials.apply(lambda row: row["score"][x + 1], axis=1),
+                        mode="markers",
+                        marker=dict(
+                            size=self.marker_size,
+                            color=m.trials.index,
+                            colorscale="Teal",
+                            line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                        ),
+                        customdata=m.trials.index,
+                        hovertemplate="(%{x}, %{y})<extra>Trial %{customdata}</extra>",
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                if x < len(metric) - 1:
+                    fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
+                if y > 0:
+                    fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
+
+                self._plot(
+                    ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                    xlabel=self._metric[y].name if x == length - 1 else None,
+                    ylabel=self._metric[x + 1].name if y == 0 else None,
+                )
+
+        BasePlot._fig.used_models.append(m)
+        return self._plot(
+            title=title,
+            legend=legend,
+            figsize=figsize or (500 + 100 * length, 500 + 100 * length),
+            plotname="plot_pareto_front",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_slice(
+        self,
+        models: INT | str | MODEL | None = None,
+        params: str | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the parameter relationship in a study.
+
+        The color of the markers indicate the trial. This plot is only
+        available for models that ran [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_slice()`.
+
+        params: str, slice, sequence or None, default=None
+            Hyperparameters to plot. Use a sequence or add `+` between
+            options to select more than one. If None, all the model's
+            hyperparameters are selected.
+
+        metric: int or str, default=None
+            Metric to plot (only for multi-metric runs). If str, add `+`
+            between options to select more than one. If None, the metric
+            used to run the pipeline is selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of hyperparameters shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_edf
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameters
+        atom.plots:HyperparameterTuningPlot.plot_parallel_coordinate
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(
+            models="RF",
+            metric=["f1", "recall"],
+            n_trials=15,
+        )
+        atom.plot_slice(params=(0, 1, 2))
+        ```
+
+        """
+        m = check_hyperparams(models, "plot_slice")[0]
+        params = self._get_hyperparams(params, models)
+        metric = self._get_metric(metric, max_one=False)
+
+        fig = self._get_figure()
+        for i in range(len(params) * len(metric)):
+            x, y = i // len(params), i % len(params)
+
+            # Calculate the distance between subplots
+            x_offset = divide(0.0125, (len(params) - 1))
+            y_offset = divide(0.0125, (len(metric) - 1))
+
+            # Calculate the size of the subplot
+            x_size = (1 - ((x_offset * 2) * (len(params) - 1))) / len(params)
+            y_size = (1 - ((y_offset * 2) * (len(metric) - 1))) / len(metric)
+
+            # Determine the position for the axes
+            x_pos = y * (x_size + 2 * x_offset)
+            y_pos = (len(metric) - x - 1) * (y_size + 2 * y_offset)
+
+            xaxis, yaxis = BasePlot._fig.get_axes(
+                x=(x_pos, rnd(x_pos + x_size)),
+                y=(y_pos, rnd(y_pos + y_size)),
+            )
+
+            fig.add_trace(
+                go.Scatter(
+                    x=m.trials.apply(lambda r: r["params"].get(params[y], None), axis=1),
+                    y=m.trials.apply(lambda r: lst(r["score"])[x], axis=1),
+                    mode="markers",
+                    marker=dict(
+                        size=self.marker_size,
+                        color=m.trials.index,
+                        colorscale="Teal",
+                        line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                    ),
+                    customdata=m.trials.index,
+                    hovertemplate="(%{x}, %{y})<extra>Trial %{customdata}</extra>",
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+            if _is_log_scale(m.study.trials, params[y]):
+                fig.update_layout({f"xaxis{xaxis[1:]}_type": "log"})
+
+            if x < len(metric) - 1:
+                fig.update_layout({f"xaxis{xaxis[1:]}_showticklabels": False})
+            if y > 0:
+                fig.update_layout({f"yaxis{yaxis[1:]}_showticklabels": False})
+
+            self._plot(
+                ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+                xlabel=params[y] if x == len(metric) - 1 else None,
+                ylabel=self._metric[x].name if y == 0 else None,
+            )
+
+        BasePlot._fig.used_models.append(m)
+        return self._plot(
+            title=title,
+            legend=legend,
+            figsize=figsize or (800 + 100 * len(params), 500 + 100 * len(metric)),
+            plotname="plot_slice",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_terminator_improvement(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the potentials for future objective improvement.
+
+        This function visualizes the objective improvement potentials.
+        It helps to determine whether you should continue the
+        optimization or not. The evaluated error is also plotted. Note
+        that this function may take some time to compute the improvement
+        potentials. This plot is only available for models that ran
+        [hyperparameter tuning][].
+
+        !!! warning
+            * The plot_terminator_improvement method is only available
+              for models that ran [hyperparameter tuning][] using
+              cross-validation, e.g. using `ht_params={'cv': 5}`.
+            * This method can be slow. Results are cached to fasten
+              repeated calls.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models that used hyperparameter
+            tuning are selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right",
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y)
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_pareto_front
+        atom.plots:HyperparameterTuningPlot.plot_timeline
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("RF", n_trials=10, ht_params={"cv": 5})
+        atom.plot_terminator_improvement()
+        ```
+
+        """
+        check_dependency("botorch")
+
+        models = check_hyperparams(models, "plot_terminator_improvement")
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            if m._ht["cv"] > 1:
+                info = self._memory.cache(_get_improvement_info)(m.study, get_error=True)
+            else:
+                raise ValueError(
+                    "The plot_terminator_improvement method is only available for "
+                    "models that ran hyperparameter tuning using cross-validation, "
+                    "e.g. using ht_params={'cv': 5}."
+                )
+
+            fig.add_trace(
+                self._draw_line(
+                    x=m.trials.index,
+                    y=info.improvements,
+                    error_y=dict(type="data", array=info.errors),
+                    mode="markers+lines",
+                    parent=m.name,
+                    legend=legend,
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Trial",
+            ylabel="Terminator improvement",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_terminator_improvement",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_timeline(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the timeline of a study.
+
+        This plot is only available for models that ran
+        [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models that used hyperparameter
+            tuning are selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right",
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y)
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_edf
+        atom.plots:HyperparameterTuningPlot.plot_slice
+        atom.plots:HyperparameterTuningPlot.plot_terminator_improvement
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from optuna.pruners import PatientPruner
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(
+            models="LGB",
+            n_trials=15,
+            ht_params={"pruner": PatientPruner(None, patience=2)},
+        )
+        atom.plot_timeline()
+        ```
+
+        """
+        models = check_hyperparams(models, "plot_timeline")
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        _cm = {
+            "COMPLETE": BasePlot._fig._palette[0],  # Main color
+            "FAIL": "rgb(255, 0, 0)",  # Red
+            "PRUNED": "rgb(255, 165, 0)",  # Orange
+            "RUNNING": "rgb(124, 252, 0)",  # Green
+            "WAITING": "rgb(220, 220, 220)",  # Gray
+        }
+
+        for m in models:
+            info = []
+            for trial in m.study.get_trials(deepcopy=False):
+                date_complete = trial.datetime_complete or datetime.now()
+                date_start = trial.datetime_start or date_complete
+
+                # Create nice representation of scores and params for hover
+                s = [f'{m}: {trial.values[i]}' for i, m in enumerate(self._metric.keys())]
+                p = [f" --> {k}: {v}" for k, v in trial.params.items()]
+
+                info.append(
+                    Bunch(
+                        number=trial.number,
+                        start=date_start,
+                        duration=1000 * (date_complete - date_start).total_seconds(),
+                        state=trial.state,
+                        hovertext=(
+                            f"Trial: {trial.number}<br>"
+                            f"{'<br>'.join(s)}"
+                            f"Parameters:<br>{'<br>'.join(p)}"
+                        )
+                    )
+                )
+
+            for state in sorted(TrialState, key=lambda x: x.name):
+                if bars := list(filter(lambda x: x.state == state, info)):
+                    fig.add_trace(
+                        go.Bar(
+                            name=state.name,
+                            x=[b.duration for b in bars],
+                            y=[b.number for b in bars],
+                            base=[b.start.isoformat() for b in bars],
+                            text=[b.hovertext for b in bars],
+                            textposition="none",
+                            hovertemplate=f"%{{text}}<extra>{m.name}</extra>",
+                            orientation="h",
+                            marker=dict(
+                                color=f"rgba({_cm[state.name][4:-1]}, 0.2)",
+                                line=dict(width=2, color=_cm[state.name]),
+                            ),
+                            showlegend=BasePlot._fig.showlegend(_cm[state.name], legend),
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        )
+                    )
+
+        fig.update_layout({f"xaxis{yaxis[1:]}_type": "date", "barmode": "group"})
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Datetime",
+            ylabel="Trial",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_timeline",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_trials(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 800),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the hyperparameter tuning trials.
+
+        Creates a figure with two plots: the first plot shows the score
+        of every trial and the second shows the distance between the
+        last consecutive steps. The best trial is indicated with a star.
+        This is the same plot as produced by `ht_params={"plot": True}`.
+        This plot is only available for models that ran
+        [hyperparameter tuning][].
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models that used hyperparameter
+            tuning are selected.
+
+        metric: int, str, sequence or None, default=None
+            Metric to plot (only for multi-metric runs). Add `+` between
+            options to select more than one. If None, all metrics are
+            selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 800)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_evals
+        atom.plots:HyperparameterTuningPlot.plot_hyperparameters
+        atom.plots:PredictionPlot.plot_results
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["ET", "RF"], n_trials=15)
+        atom.plot_trials()
+        ```
+
+        """
+        models = check_hyperparams(models, "plot_trials")
+        metric = self._get_metric(metric, max_one=False)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes(y=(0.31, 1.0))
+        xaxis2, yaxis2 = BasePlot._fig.get_axes(y=(0.0, 0.29))
+        for m in models:
+            for met in metric:
+                y = m.trials["score"].apply(lambda value: lst(value)[met])
+
+                # Create star symbol at best trial
+                symbols = ["circle"] * len(y)
+                symbols[m.best_trial.number] = "star"
+                sizes = [self.marker_size] * len(y)
+                sizes[m.best_trial.number] = self.marker_size * 1.5
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=list(range(len(y))),
+                        y=y,
+                        mode="lines+markers",
+                        marker_symbol=symbols,
+                        marker_size=sizes,
+                        hovertemplate=None,
+                        parent=m.name,
+                        child=self._metric[met].name,
+                        legend=legend,
+                        xaxis=xaxis2,
+                        yaxis=yaxis,
+                    )
+                )
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=list(range(1, len(y))),
+                        y=np.abs(np.diff(y)),
+                        mode="lines+markers",
+                        marker_symbol="circle",
+                        parent=m.name,
+                        child=self._metric[met].name,
+                        legend=legend,
+                        xaxis=xaxis2,
+                        yaxis=yaxis2,
+                    )
+                )
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}_anchor": f"x{xaxis2[1:]}",
+                f"xaxis{xaxis[1:]}_showticklabels": False,
+                "hovermode": "x unified",
+            },
+        )
+
+        self._plot(
+            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
+            xlabel="Trial",
+            ylabel="d",
+        )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            ylabel="Score",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_trials",
+            filename=filename,
+            display=display,
+        )
diff --git a/atom/plots/predictionplot.py b/atom/plots/predictionplot.py
new file mode 100644
index 000000000..22ef8a691
--- /dev/null
+++ b/atom/plots/predictionplot.py
@@ -0,0 +1,3546 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the PredictionPlot class.
+
+"""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from functools import reduce
+from itertools import chain
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from joblib import Parallel, delayed
+from plotly.colors import unconvert_from_RGB_255, unlabel_rgb
+from scipy import stats
+from scipy.stats.mstats import mquantiles
+from sklearn.calibration import calibration_curve
+from sklearn.inspection import partial_dependence, permutation_importance
+from sklearn.metrics import (
+    confusion_matrix, det_curve, precision_recall_curve, roc_curve,
+)
+from sklearn.utils import _safe_indexing
+from sklearn.utils.metaestimators import available_if
+from sktime.forecasting.base import ForecastingHorizon
+from typeguard import typechecked
+
+from atom.plots.base import BasePlot
+from atom.utils.constants import PALETTE
+from atom.utils.types import (
+    FEATURES, FLOAT, INT, LEGEND, METRIC_SELECTOR, MODEL, SCALAR, SEQUENCE,
+    SLICE,
+)
+from atom.utils.utils import (
+    bk, check_canvas, check_dependency, check_predict_proba, composed, crash,
+    divide, get_best_score, get_custom_scorer, has_task, is_binary,
+    is_multioutput, lst, plot_from_model, rnd,
+)
+
+
+@typechecked
+class PredictionPlot(BasePlot):
+    """Prediction plots.
+
+    Plots that use the model's predictions. These plots are accessible
+    from the runners or from the models. If called from a runner, the
+    `models` parameter has to be specified (if None, uses all models).
+    If called from a model, that model is used and the `models` parameter
+    becomes unavailable.
+
+    """
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_calibration(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        n_bins: INT = 10,
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 900),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the calibration curve for a binary classifier.
+
+        Well calibrated classifiers are probabilistic classifiers for
+        which the output of the `predict_proba` method can be directly
+        interpreted as a confidence level. For instance a well
+        calibrated (binary) classifier should classify the samples such
+        that among the samples to which it gave a `predict_proba` value
+        close to 0.8, approx. 80% actually belong to the positive class.
+        Read more in sklearn's [documentation][calibration].
+
+        This figure shows two plots: the calibration curve, where the
+        x-axis represents the average predicted probability in each bin
+        and the y-axis is the fraction of positives, i.e. the proportion
+        of samples whose class is the positive class (in each bin); and
+        a distribution of all predicted probabilities of the classifier.
+        This plot is available only for models with a `predict_proba`
+        method in a binary or [multilabel][] classification task.
+
+        !!! tip
+            Use the [calibrate][adaboost-calibrate] method to calibrate
+            the winning model.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        n_bins: int, default=10
+            Number of bins used for calibration. Minimum of 5 required.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 900)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_lift
+        atom.plots:PredictionPlot.plot_prc
+        atom.plots:PredictionPlot.plot_roc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["RF", "LGB"])
+        atom.plot_calibration()
+        ```
+
+        """
+        check_predict_proba(models, "plot_calibration")
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        if n_bins < 5:
+            raise ValueError(
+                "Invalid value for the n_bins parameter."
+                f"Value should be >=5, got {n_bins}."
+            )
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes(y=(0.31, 1.0))
+        xaxis2, yaxis2 = BasePlot._fig.get_axes(y=(0.0, 0.29))
+        for m in models:
+            for ds in dataset:
+                y_true, y_pred = m._get_pred(ds, target, attr="predict_proba")
+
+                # Get calibration (frac of positives and predicted values)
+                frac_pos, pred = calibration_curve(y_true, y_pred, n_bins=n_bins)
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=pred,
+                        y=frac_pos,
+                        parent=m.name,
+                        child=ds,
+                        mode="lines+markers",
+                        marker_symbol="circle",
+                        legend=legend,
+                        xaxis=xaxis2,
+                        yaxis=yaxis,
+                    )
+                )
+
+                fig.add_trace(
+                    go.Histogram(
+                        x=y_pred,
+                        xbins=dict(start=0, end=1, size=1. / n_bins),
+                        marker=dict(
+                            color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
+                            line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                        ),
+                        name=m.name,
+                        legendgroup=m.name,
+                        showlegend=False,
+                        xaxis=xaxis2,
+                        yaxis=yaxis2,
+                    )
+                )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis2, yaxis=yaxis)
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}_anchor": f"x{xaxis2[1:]}",
+                f"xaxis{xaxis2[1:]}_showgrid": True,
+                "barmode": "overlay",
+            }
+        )
+
+        self._plot(
+            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
+            xlabel="Predicted value",
+            ylabel="Count",
+            xlim=(0, 1),
+        )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            ylabel="Fraction of positives",
+            ylim=(-0.05, 1.05),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_calibration",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task("class"))
+    @composed(crash, plot_from_model)
+    def plot_confusion_matrix(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str = "test",
+        target: INT | str = 0,
+        threshold: FLOAT = 0.5,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a model's confusion matrix.
+
+        For one model, the plot shows a heatmap. For multiple models,
+        it compares TP, FP, FN and TN in a barplot (not implemented
+        for multiclass classification tasks). This plot is available
+        only for classification tasks.
+
+        !!! tip
+            Fill the `threshold` parameter with the result from the
+            model's `get_best_threshold` method to optimize the results.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str, default="test"
+            Data set on which to calculate the confusion matrix. Choose
+            from:` "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multioutput tasks][].
+
+        threshold: float, default=0.5
+            Threshold between 0 and 1 to convert predicted probabilities
+            to class labels. Only for binary classification tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the plot's type.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_calibration
+        atom.plots:PredictionPlot.plot_threshold
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, test_size=0.4)
+        atom.run(["LR", "RF"])
+        atom.lr.plot_confusion_matrix()  # For one model
+        atom.plot_confusion_matrix()  # For multiple models
+        ```
+
+        """
+        ds = self._get_set(dataset, max_one=True)
+        target = self.branch._get_target(target, only_columns=True)
+
+        if self.task.startswith("multiclass") and len(models) > 1:
+            raise NotImplementedError(
+                "The plot_confusion_matrix method does not support "
+                "the comparison of multiple models for multiclass "
+                "or multiclass-multioutput classification tasks."
+            )
+
+        labels = np.array(
+            (("True negatives", "False positives"), ("False negatives", "True positives"))
+        )
+
+        fig = self._get_figure()
+        if len(models) == 1:
+            xaxis, yaxis = BasePlot._fig.get_axes(
+                x=(0, 0.87),
+                coloraxis=dict(
+                    colorscale="Blues",
+                    cmin=0,
+                    cmax=100,
+                    title="Percentage of samples",
+                    font_size=self.label_fontsize,
+                ),
+            )
+        else:
+            xaxis, yaxis = BasePlot._fig.get_axes()
+
+        for m in models:
+            y_true, y_pred = m._get_pred(ds, target, attr="predict")
+            if threshold != 0.5:
+                y_pred = (y_pred > threshold).astype("int")
+
+            cm = confusion_matrix(y_true, y_pred)
+            if len(models) == 1:  # Create matrix heatmap
+                ticks = m.mapping.get(target, np.unique(m.dataset[target]).astype(str))
+                xaxis, yaxis = BasePlot._fig.get_axes(
+                    x=(0, 0.87),
+                    coloraxis=dict(
+                        colorscale="Blues",
+                        cmin=0,
+                        cmax=100,
+                        title="Percentage of samples",
+                        font_size=self.label_fontsize,
+                    ),
+                )
+
+                fig.add_trace(
+                    go.Heatmap(
+                        x=ticks,
+                        y=ticks,
+                        z=100. * cm / cm.sum(axis=1)[:, np.newaxis],
+                        coloraxis=f"coloraxis{xaxis[1:]}",
+                        text=cm,
+                        customdata=labels,
+                        texttemplate="%{text}<br>(%{z:.2f}%)",
+                        textfont=dict(size=self.label_fontsize),
+                        hovertemplate=(
+                            "<b>%{customdata}</b><br>" if is_binary(self.task) else ""
+                            "x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>"
+                        ),
+                        showlegend=False,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                fig.update_layout(
+                    {
+                        "template": "plotly_white",
+                        f"yaxis{yaxis[1:]}_autorange": "reversed",
+                        f"xaxis{xaxis[1:]}_showgrid": False,
+                        f"yaxis{yaxis[1:]}_showgrid": False,
+                    }
+                )
+
+            else:
+                color = BasePlot._fig.get_elem(m.name)
+                fig.add_trace(
+                    go.Bar(
+                        x=cm.ravel(),
+                        y=labels.ravel(),
+                        orientation="h",
+                        marker=dict(
+                            color=f"rgba({color[4:-1]}, 0.2)",
+                            line=dict(width=2, color=color),
+                        ),
+                        hovertemplate="%{x}<extra></extra>",
+                        name=m.name,
+                        legendgroup=m.name,
+                        showlegend=BasePlot._fig.showlegend(m.name, legend),
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                fig.update_layout(bargroupgap=0.05)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Predicted label" if len(models) == 1 else "Count",
+            ylabel="True label" if len(models) == 1 else None,
+            title=title,
+            legend=legend,
+            figsize=figsize or ((800, 800) if len(models) == 1 else (900, 600)),
+            plotname="plot_confusion_matrix",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_det(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ):
+        """Plot the Detection Error Tradeoff curve.
+
+        Read more about [DET][] in sklearn's documentation. Only
+        available for binary classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_gains
+        atom.plots:PredictionPlot.plot_roc
+        atom.plots:PredictionPlot.plot_prc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_det()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            for ds in dataset:
+                # Get fpr-fnr pairs for different thresholds
+                fpr, fnr, _ = det_curve(*m._get_pred(ds, target, attr="thresh"))
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=fpr,
+                        y=fnr,
+                        mode="lines",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="FPR",
+            ylabel="FNR",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_det",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task("reg"))
+    @composed(crash, plot_from_model)
+    def plot_errors(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a model's prediction errors.
+
+        Plot the actual targets from a set against the predicted values
+        generated by the regressor. A linear fit is made on the data.
+        The gray, intersected line shows the identity line. This plot
+        can be useful to detect noise or heteroscedasticity along a
+        range of the target domain. This plot is available only for
+        regression tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str, default="test"
+            Data set on which to calculate the metric. Choose from:
+            "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multioutput tasks][].
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_residuals
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMRegressor
+        from sklearn.datasets import load_diabetes
+
+        X, y = load_diabetes(return_X_y=True, as_frame=True)
+
+        atom = ATOMRegressor(X, y)
+        atom.run(["OLS", "LGB"])
+        atom.plot_errors()
+        ```
+
+        """
+        ds = self._get_set(dataset, max_one=True)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            y_true, y_pred = m._get_pred(ds, target)
+
+            fig.add_trace(
+                go.Scatter(
+                    x=y_true,
+                    y=y_pred,
+                    mode="markers",
+                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+            # Fit the points using linear regression
+            from atom.models import OrdinaryLeastSquares
+            model = OrdinaryLeastSquares(goal=self.goal, branch=m.branch)._get_est()
+            model.fit(y_true.values.reshape(-1, 1), y_pred)
+
+            fig.add_trace(
+                go.Scatter(
+                    x=(x := np.linspace(y_true.min(), y_true.max(), 100)),
+                    y=model.predict(x[:, np.newaxis]),
+                    mode="lines",
+                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    hovertemplate="(%{x}, %{y})<extra></extra>",
+                    legendgroup=m.name,
+                    showlegend=False,
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            xlabel="True value",
+            title=title,
+            legend=legend,
+            ylabel="Predicted value",
+            figsize=figsize,
+            plotname="plot_errors",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(ensembles=False))
+    def plot_evals(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot evaluation curves.
+
+        The evaluation curves are the main metric scores achieved by the
+        models at every iteration of the training process. This plot is
+        available only for models that allow [in-training validation][].
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the evaluation curves. Use a
+            sequence or add `+` between options to select more than one.
+            Choose from: "train" or "test".
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:HyperparameterTuningPlot.plot_trials
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["XGB", "LGB"])
+        atom.plot_evals()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False, allow_holdout=False)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            if not m.evals:
+                raise ValueError(
+                    "Invalid value for the models parameter. Model "
+                    f"{m.name} has no in-training validation."
+                )
+
+            for ds in dataset:
+                fig.add_trace(
+                    self._draw_line(
+                        x=list(range(len(m.evals[f"{self._metric[0].name}_{ds}"]))),
+                        y=m.evals[f"{self._metric[0].name}_{ds}"],
+                        marker_symbol="circle",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Iterations",
+            ylabel=self._metric[0].name,
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_evals",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_feature_importance(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        show: INT | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a model's feature importance.
+
+        The sum of importances for all features (per model) is 1.
+        This plot is available only for models whose estimator has
+        a `scores_`, `feature_importances_` or `coef` attribute.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_parshap
+        atom.plots:PredictionPlot.plot_partial_dependence
+        atom.plots:PredictionPlot.plot_permutation_importance
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_feature_importance(show=10)
+        ```
+
+        """
+        show = self._get_show(show, models)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            if (fi := m.feature_importance) is None:
+                raise ValueError(
+                    "Invalid value for the models parameter. The estimator "
+                    f"{m.estimator.__class__.__name__} has no feature_importances_ "
+                    "nor coef_ attribute."
+                )
+
+            fig.add_trace(
+                go.Bar(
+                    x=fi,
+                    y=fi.index,
+                    orientation="h",
+                    marker=dict(
+                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
+                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    ),
+                    hovertemplate="%{x}<extra></extra>",
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
+                "bargroupgap": 0.05,
+            }
+        )
+
+        # Unique number of features over all branches
+        n_fxs = len(set([fx for m in models for fx in m.features]))
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Normalized feature importance",
+            ylim=(n_fxs - show - 0.5, n_fxs - 0.5),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_feature_importance",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task("forecast"))
+    @composed(crash, plot_from_model(check_fitted=False))
+    def plot_forecast(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        fh: int | str | range | SEQUENCE | ForecastingHorizon = "test",
+        X: FEATURES | None = None,
+        target: INT | str = 0,
+        plot_interval: bool = True,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a time series with model forecasts.
+
+        This plot is only available for forecasting tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected. If no
+            models are selected, only the target column is plotted.
+
+        fh: int, str, range, sequence or [ForecastingHorizon][], default="test"
+            Forecast horizon for which to plot the predictions. If
+            string, choose from: "train", "test" or "holdout". Use a
+            sequence or add `+` between options to select more than one.
+
+        X: dataframe-like or None, default=None
+            Exogenous time series corresponding to fh. This parameter
+            is ignored if fh is a data set.
+
+        target: int or str, default=0
+            Target column to look at. Only for [multivariate][] tasks.
+
+        plot_interval: bool, default=True
+            Whether to plot prediction intervals instead of the exact
+            prediction values. If True, the plotted estimators should
+            have a `predict_interval` method.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_lift
+        atom.plots:PredictionPlot.plot_prc
+        atom.plots:PredictionPlot.plot_roc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMForecaster
+        from sktime.datasets import load_airline
+
+        y = load_airline()
+
+        atom = ATOMForecaster(y, random_state=1)
+        atom.plot_forecast()
+        atom.run(
+            models="arima",
+            est_params={"order": (1, 1, 0), "seasonal_order": (0, 1, 0, 12)},
+        )
+        atom.plot_forecast()
+        atom.plot_forecast(fh="train+test", plot_interval=False)
+
+        # Forecast the next 4 years starting from the test set
+        atom.plot_forecast(fh=range(1, 48))
+        ```
+
+        """
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        # Draw original time series
+        for ds in ("train", "test"):
+            fig.add_trace(
+                go.Scatter(
+                    x=self._get_plot_index(getattr(self, ds)),
+                    y=getattr(self, ds)[target],
+                    mode="lines+markers",
+                    line=dict(
+                        width=2,
+                        color="black",
+                        dash=BasePlot._fig.get_elem(ds, "dash"),
+                    ),
+                    opacity=0.6,
+                    name=ds,
+                    showlegend=False if models else BasePlot._fig.showlegend(ds, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        # Draw predictions
+        for m in models:
+            if isinstance(fh, str):
+                # Get fh and corresponding X from data set
+                datasets = self._get_set(fh, max_one=False)
+                fh = bk.concat([getattr(m, ds) for ds in datasets]).index
+                X = m.X.loc[fh]
+
+            y_pred = m.predict(fh, X)
+            if is_multioutput(self.task):
+                y_pred = y_pred[target]
+
+            fig.add_trace(
+                self._draw_line(
+                    x=self._get_plot_index(y_pred),
+                    y=y_pred,
+                    mode="lines+markers",
+                    parent=m.name,
+                    legend=legend,
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+            if plot_interval:
+                try:
+                    y_pred = m.predict_interval(fh, X)
+                except NotImplementedError:
+                    continue  # Fails for some models like ES
+
+                if is_multioutput(self.task):
+                    # Select interval of target column for multivariate
+                    y = y_pred.iloc[:, y_pred.columns.get_loc(target)]
+                else:
+                    y = y_pred  # Univariate
+
+                fig.add_traces(
+                    [
+                        go.Scatter(
+                            x=self._get_plot_index(y_pred),
+                            y=y.iloc[:, 1],
+                            mode="lines",
+                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
+                            hovertemplate=f"%{{y}}<extra>{m.name} - upper bound</extra>",
+                            legendgroup=m.name,
+                            showlegend=False,
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        ),
+                        go.Scatter(
+                            x=self._get_plot_index(y_pred),
+                            y=y.iloc[:, 0],
+                            mode="lines",
+                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
+                            fill="tonexty",
+                            fillcolor=f"rgba{BasePlot._fig.get_elem(m.name)[3:-1]}, 0.2)",
+                            hovertemplate=f"%{{y}}<extra>{m.name} - lower bound</extra>",
+                            legendgroup=m.name,
+                            showlegend=False,
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        )
+                    ]
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup" if plot_interval else "toggleitem",
+            xlabel=self.y.index.name,
+            ylabel=target,
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_forecast",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_gains(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the cumulative gains curve.
+
+        This plot is available only for binary and [multilabel][]
+        classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_det
+        atom.plots:PredictionPlot.plot_lift
+        atom.plots:PredictionPlot.plot_roc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_gains()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            for ds in dataset:
+                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=np.arange(start=1, stop=len(y_true) + 1) / len(y_true),
+                        y=np.cumsum(y_true.iloc[np.argsort(y_pred)[::-1]]) / y_true.sum(),
+                        mode="lines",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Fraction of sample",
+            ylabel="Gain",
+            xlim=(0, 1),
+            ylim=(0, 1.02),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_gains",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(ensembles=False))
+    def plot_learning_curve(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the learning curve: score vs number of training samples.
+
+        This plot is available only for models fitted using
+        [train sizing][]. [Ensembles][] are ignored.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        metric: int, str, sequence or None, default=None
+            Metric to plot (only for multi-metric runs). Use a sequence
+            or add `+` between options to select more than one. If None,
+            the metric used to run the pipeline is selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_results
+        atom.plots:PredictionPlot.plot_successive_halving
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.train_sizing(["LR", "RF"], n_bootstrap=5)
+        atom.plot_learning_curve()
+        ```
+
+        """
+        metric = self._get_metric(metric, max_one=False)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        for met in metric:
+            x, y, std = defaultdict(list), defaultdict(list), defaultdict(list)
+            for m in models:
+                x[m._group].append(m._train_idx)
+                y[m._group].append(get_best_score(m, met))
+                if m.bootstrap is not None:
+                    std[m._group].append(m.bootstrap.iloc[:, met].std())
+
+            for group in x:
+                fig.add_trace(
+                    self._draw_line(
+                        x=x[group],
+                        y=y[group],
+                        mode="lines+markers",
+                        marker_symbol="circle",
+                        error_y=dict(type="data", array=std[group], visible=True),
+                        parent=group,
+                        child=self._metric[met].name,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                # Add error bands
+                if m.bootstrap is not None:
+                    fillcolor = f"rgba{BasePlot._fig.get_elem(group)[3:-1]}, 0.2)"
+                    fig.add_traces(
+                        [
+                            go.Scatter(
+                                x=x[group],
+                                y=np.add(y[group], std[group]),
+                                mode="lines",
+                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
+                                hovertemplate="%{y}<extra>upper bound</extra>",
+                                legendgroup=group,
+                                showlegend=False,
+                                xaxis=xaxis,
+                                yaxis=yaxis,
+                            ),
+                            go.Scatter(
+                                x=x[group],
+                                y=np.subtract(y[group], std[group]),
+                                mode="lines",
+                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
+                                fill="tonexty",
+                                fillcolor=fillcolor,
+                                hovertemplate="%{y}<extra>lower bound</extra>",
+                                legendgroup=group,
+                                showlegend=False,
+                                xaxis=xaxis,
+                                yaxis=yaxis,
+                            ),
+                        ]
+                    )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            title=title,
+            legend=legend,
+            xlabel="Number of training samples",
+            ylabel="Score",
+            figsize=figsize,
+            plotname="plot_learning_curve",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_lift(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the lift curve.
+
+        Only available for binary classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_det
+        atom.plots:PredictionPlot.plot_gains
+        atom.plots:PredictionPlot.plot_prc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_lift()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            for ds in dataset:
+                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
+
+                gains = np.cumsum(y_true.iloc[np.argsort(y_pred)[::-1]]) / y_true.sum()
+                fig.add_trace(
+                    self._draw_line(
+                        x=(x := np.arange(start=1, stop=len(y_true) + 1) / len(y_true)),
+                        y=gains / x,
+                        mode="lines",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        self._draw_straight_line(y=1, xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Fraction of sample",
+            ylabel="Lift",
+            xlim=(0, 1),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_lift",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_parshap(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        columns: SLICE | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the partial correlation of shap values.
+
+        Plots the train and test correlation between the shap value of
+        every feature with its target value, after removing the effect
+        of all other features (partial correlation). This plot is
+        useful to identify the features that are contributing most to
+        overfitting. Features that lie below the bisector (diagonal
+        line) performed worse on the test set than on the training set.
+        If the estimator has a `scores_`, `feature_importances_` or
+        `coef_` attribute, its normalized values are shown in a color
+        map.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        columns: int, str, slice, sequence or None, default=None
+            Features to plot. If None, it plots all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_feature_importance
+        atom.plots:PredictionPlot.plot_partial_dependence
+        atom.plots:PredictionPlot.plot_permutation_importance
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["GNB", "RF"])
+        atom.rf.plot_parshap(legend=None)
+        atom.plot_parshap(columns=slice(5, 10))
+        ```
+
+        """
+        target = self.branch._get_target(target)
+
+        fig = self._get_figure()
+
+        # Colorbar is only needed when a model has feature_importance
+        if all(m.feature_importance is None for m in models):
+            xaxis, yaxis = BasePlot._fig.get_axes()
+        else:
+            xaxis, yaxis = BasePlot._fig.get_axes(
+                x=(0, 0.87),
+                coloraxis=dict(
+                    colorscale="Reds",
+                    title="Normalized feature importance",
+                    font_size=self.label_fontsize,
+                )
+            )
+
+        for m in models:
+            parshap = {}
+            fxs = m.branch._get_columns(columns, include_target=False)
+
+            for ds in ("train", "test"):
+                # Calculating shap values is computationally expensive,
+                # therefore select a random subsample for large data sets
+                if len(data := getattr(m, ds)) > 500:
+                    data = data.sample(500, random_state=self.random_state)
+
+                # Replace data with the calculated shap values
+                explanation = m._shap.get_explanation(data[m.features], target)
+                data[m.features] = explanation.values
+
+                parshap[ds] = pd.Series(index=fxs, dtype=float)
+                for fx in fxs:
+                    # All other features are covariates
+                    covariates = [f for f in data.columns[:-1] if f != fx]
+                    cols = [fx, data.columns[-1], *covariates]
+
+                    # Compute covariance
+                    V = data[cols].cov()
+
+                    # Inverse covariance matrix
+                    Vi = np.linalg.pinv(V, hermitian=True)
+                    diag = Vi.diagonal()
+
+                    D = np.diag(np.sqrt(1 / diag))
+
+                    # Partial correlation matrix
+                    partial_corr = -1 * (D @ Vi @ D)  # @ is matrix multiplication
+
+                    # Semi-partial correlation matrix
+                    with np.errstate(divide="ignore"):
+                        V_sqrt = np.sqrt(np.diag(V))[..., None]
+                        Vi_sqrt = np.sqrt(np.abs(diag - Vi ** 2 / diag[..., None])).T
+                        semi_partial_correlation = partial_corr / V_sqrt / Vi_sqrt
+
+                    # X covariates are removed
+                    parshap[ds][fx] = semi_partial_correlation[1, 0]
+
+            # Get the feature importance or coefficients
+            if m.feature_importance is not None:
+                color = m.feature_importance.loc[fxs]
+            else:
+                color = BasePlot._fig.get_elem("parshap")
+
+            fig.add_trace(
+                go.Scatter(
+                    x=parshap["train"],
+                    y=parshap["test"],
+                    mode="markers+text",
+                    marker=dict(
+                        color=color,
+                        size=self.marker_size,
+                        coloraxis=f"coloraxis{xaxis[1:]}",
+                        line=dict(width=1, color="rgba(255, 255, 255, 0.9)"),
+                    ),
+                    text=m.features,
+                    textposition="top center",
+                    customdata=(data := None if isinstance(color, str) else list(color)),
+                    hovertemplate=(
+                        f"%{{text}}<br>(%{{x}}, %{{y}})"
+                        f"{'<br>Feature importance: %{customdata:.4f}' if data else ''}"
+                        f"<extra>{m.name}</extra>"
+                    ),
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Training set",
+            ylabel="Test set",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_parshap",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_partial_dependence(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        columns: SLICE | None = None,
+        kind: str | SEQUENCE = "average",
+        pair: int | str | None = None,
+        target: INT | str = 1,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the partial dependence of features.
+
+        The partial dependence of a feature (or a set of features)
+        corresponds to the response of the model for each possible
+        value of the feature. The plot can take two forms:
+
+        - If `pair` is None: Single feature partial dependence lines.
+          The deciles of the feature values are shown with tick marks
+          on the bottom.
+        - If `pair` is defined: Two-way partial dependence plots are
+          plotted as contour plots (only allowed for a single model).
+
+        Read more about partial dependence on sklearn's
+        [documentation][partial_dependence]. This plot is not available
+        for multilabel nor multiclass-multioutput classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        columns: int, str, slice, sequence or None, default=None
+            Features to get the partial dependence from. If None, it
+            uses the first 3 features in the dataset.
+
+        kind: str or sequence, default="average"
+            Kind of depedence to plot. Use a sequence or add `+` between
+            options to select more than one. Choose from:
+
+            - "average": Partial dependence averaged across all samples
+              in the dataset.
+            - "individual": Partial dependence for up to 50 random
+              samples (Individual Conditional Expectation).
+
+            This parameter is ignored when plotting feature pairs.
+
+        pair: int, str or None, default=None
+            Feature with which to pair the features selected by
+            `columns`. If specified, the resulting figure displays
+            contour plots. Only allowed when plotting a single model.
+            If None, the plots show the partial dependece of single
+            features.
+
+        target: int or str, default=1
+            Class in the target column to look at (only for multiclass
+            classification tasks).
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_feature_importance
+        atom.plots:PredictionPlot.plot_parshap
+        atom.plots:PredictionPlot.plot_permutation_importance
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_partial_dependence(kind="average+individual", legend="upper left")
+        atom.rf.plot_partial_dependence(columns=(3, 4), pair=2)
+        ```
+
+        """
+        if any(self.task.startswith(t) for t in ("multilabel", "multiclass-multioutput")):
+            raise PermissionError(
+                "The plot_partial_dependence method is not available for multilabel "
+                f"nor multiclass-multioutput classification tasks, got {self.task}."
+            )
+        elif self.task.startswith("multiclass"):
+            _, target = self.branch._get_target(target)
+        else:
+            target = 0
+
+        kind = "+".join(lst(kind)).lower()
+        if any(k not in ("average", "individual") for k in kind.split("+")):
+            raise ValueError(
+                f"Invalid value for the kind parameter, got {kind}. "
+                "Choose from: average, individual."
+            )
+
+        axes, names = [], []
+        fig = self._get_figure()
+        for m in models:
+            color = BasePlot._fig.get_elem(m.name)
+
+            # Since every model can have different fxs, select them
+            # every time and make sure the models use the same fxs
+            cols = m.branch._get_columns(
+                columns=(0, 1, 2) if columns is None else columns,
+                include_target=False,
+            )
+
+            if not names:
+                names = cols
+            elif names != cols:
+                raise ValueError(
+                    "Invalid value for the columns parameter. Not all "
+                    f"models use the same features, got {names} and {cols}."
+                )
+
+            if pair is not None:
+                if len(models) > 1:
+                    raise ValueError(
+                        f"Invalid value for the pair parameter, got {pair}. "
+                        "The value must be None when plotting multiple models"
+                    )
+                else:
+                    pair = m.branch._get_columns(pair, include_target=False)
+                    cols = [(c, pair[0]) for c in cols]
+            else:
+                cols = [(c,) for c in cols]
+
+            # Create new axes
+            if not axes:
+                for i, col in enumerate(cols):
+                    # Calculate the distance between subplots
+                    offset = divide(0.025, len(cols) - 1)
+
+                    # Calculate the size of the subplot
+                    size = (1 - ((offset * 2) * (len(cols) - 1))) / len(cols)
+
+                    # Determine the position for the axes
+                    x_pos = i % len(cols) * (size + 2 * offset)
+
+                    xaxis, yaxis = BasePlot._fig.get_axes(x=(x_pos, rnd(x_pos + size)))
+                    axes.append((xaxis, yaxis))
+
+            # Compute averaged predictions
+            predictions = Parallel(n_jobs=self.n_jobs, backend=self.backend)(
+                delayed(partial_dependence)(
+                    estimator=m.estimator,
+                    X=m.X_test,
+                    features=col,
+                    kind="both" if "individual" in kind else "average",
+                ) for col in cols
+            )
+
+            # Compute deciles for ticks (only if line plots)
+            if len(cols[0]) == 1:
+                deciles = {}
+                for fx in chain.from_iterable(cols):
+                    if fx not in deciles:  # Skip if the feature is repeated
+                        X_col = _safe_indexing(m.X_test, fx, axis=1)
+                        deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
+
+            for i, (ax, fx, pred) in enumerate(zip(axes, cols, predictions)):
+                # Draw line or contour plot
+                if len(pred["values"]) == 1:
+                    # For both average and individual: draw ticks on the horizontal axis
+                    for line in deciles[fx[0]]:
+                        fig.add_shape(
+                            type="line",
+                            x0=line,
+                            x1=line,
+                            xref=ax[0],
+                            y0=0,
+                            y1=0.05,
+                            yref=f"{axes[0][1]} domain",
+                            line=dict(width=1, color=BasePlot._fig.get_elem(m.name)),
+                            opacity=0.6,
+                            layer="below",
+                        )
+
+                    # Draw the mean of the individual lines
+                    if "average" in kind:
+                        fig.add_trace(
+                            go.Scatter(
+                                x=pred["values"][0],
+                                y=pred["average"][target].ravel(),
+                                mode="lines",
+                                line=dict(width=2, color=color),
+                                name=m.name,
+                                legendgroup=m.name,
+                                showlegend=BasePlot._fig.showlegend(m.name, legend),
+                                xaxis=ax[0],
+                                yaxis=axes[0][1],
+                            )
+                        )
+
+                    # Draw all individual (per sample) lines (ICE)
+                    if "individual" in kind:
+                        # Select up to 50 random samples to plot
+                        idx = np.random.choice(
+                            list(range(len(pred["individual"][target]))),
+                            size=min(len(pred["individual"][target]), 50),
+                            replace=False,
+                        )
+                        for sample in pred["individual"][target, idx, :]:
+                            fig.add_trace(
+                                go.Scatter(
+                                    x=pred["values"][0],
+                                    y=sample,
+                                    mode="lines",
+                                    line=dict(width=0.5, color=color),
+                                    name=m.name,
+                                    legendgroup=m.name,
+                                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                                    xaxis=ax[0],
+                                    yaxis=axes[0][1],
+                                )
+                            )
+
+                else:
+                    colorscale = PALETTE.get(BasePlot._fig.get_elem(m.name), "Teal")
+                    fig.add_trace(
+                        go.Contour(
+                            x=pred["values"][0],
+                            y=pred["values"][1],
+                            z=pred["average"][target],
+                            contours=dict(
+                                showlabels=True,
+                                labelfont=dict(size=self.tick_fontsize, color="white")
+                            ),
+                            hovertemplate="x:%{x}<br>y:%{y}<br>z:%{z}<extra></extra>",
+                            hoverongaps=False,
+                            colorscale=colorscale,
+                            showscale=False,
+                            showlegend=False,
+                            xaxis=ax[0],
+                            yaxis=axes[0][1],
+                        )
+                    )
+
+                self._plot(
+                    ax=(f"xaxis{ax[0][1:]}", f"yaxis{ax[1][1:]}"),
+                    xlabel=fx[0],
+                    ylabel=(fx[1] if len(fx) > 1 else "Score") if i == 0 else None,
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            groupclick="togglegroup",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_partial_dependence",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_permutation_importance(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        show: INT | None = None,
+        n_repeats: INT = 10,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the feature permutation importance of models.
+
+        !!! warning
+            This method can be slow. Results are cached to fasten
+            repeated calls.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        n_repeats: int, default=10
+            Number of times to permute each feature.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_feature_importance
+        atom.plots:PredictionPlot.plot_partial_dependence
+        atom.plots:PredictionPlot.plot_parshap
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_permutation_importance(show=10, n_repeats=7)
+        ```
+
+        """
+        show = self._get_show(show, models)
+
+        if n_repeats <= 0:
+            raise ValueError(
+                "Invalid value for the n_repeats parameter."
+                f"Value should be >0, got {n_repeats}."
+            )
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        for m in models:
+            # Permutation importances returns Bunch object
+            permutations = self._memory.cache(permutation_importance)(
+                estimator=m.estimator,
+                X=m.X_test,
+                y=m.y_test,
+                scoring=self._metric[0],
+                n_repeats=n_repeats,
+                n_jobs=self.n_jobs,
+                random_state=self.random_state,
+            )
+
+            fig.add_trace(
+                go.Box(
+                    x=permutations["importances"].ravel(),
+                    y=list(np.array([[fx] * n_repeats for fx in m.features]).ravel()),
+                    marker_color=BasePlot._fig.get_elem(m.name),
+                    boxpoints="outliers",
+                    orientation="h",
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
+                "boxmode": "group",
+            }
+        )
+
+        # Unique number of features over all branches
+        n_fxs = len(set([fx for m in models for fx in m.features]))
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Score",
+            ylim=(n_fxs - show - 0.5, n_fxs - 0.5),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_permutation_importance",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(check_fitted=False))
+    def plot_pipeline(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        draw_hyperparameter_tuning: bool = True,
+        color_branches: bool | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot a diagram of the pipeline.
+
+        !!! warning
+            This plot uses the [schemdraw][] package, which is
+            incompatible with [plotly][]. The returned plot is
+            therefore a [matplotlib figure][pltfigure].
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models for which to draw the pipeline. If None, all
+            pipelines are plotted.
+
+        draw_hyperparameter_tuning: bool, default=True
+            Whether to draw if the models used Hyperparameter Tuning.
+
+        color_branches: bool or None, default=None
+            Whether to draw every branch in a different color. If None,
+            branches are colored when there is more than one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the pipeline drawn.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:DataPlot.plot_wordcloud
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["GNB", "RNN", "SGD", "MLP"])
+        atom.voting(models=atom.winners[:2])
+        atom.plot_pipeline()
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.scale()
+        atom.prune()
+        atom.run("RF", n_trials=30)
+
+        atom.branch = "undersample"
+        atom.balance("nearmiss")
+        atom.run("RF_undersample")
+
+        atom.branch = "oversample_from_master"
+        atom.balance("smote")
+        atom.run("RF_oversample")
+
+        atom.plot_pipeline()
+        ```
+
+        """
+
+        def get_length(pl, i):
+            """Get the maximum length of the name of a block."""
+            if len(pl) > i:
+                return max(len(pl[i].__class__.__name__) * 0.5, 7)
+            else:
+                return 0
+
+        def check_y(xy):
+            """Return y unless there is something right, then jump."""
+            while any(pos[0] > xy[0] and pos[1] == xy[1] for pos in positions.values()):
+                xy = Point((xy[0], xy[1] + height))
+
+            return xy[1]
+
+        def add_wire(x, y):
+            """Draw a connecting wire between two estimators."""
+            d.add(
+                Wire(shape="z", k=(x - d.here[0]) / (length + 1), arrow="->")
+                .to((x, y))
+                .color(branch["color"])
+            )
+
+            # Update arrowhead manually
+            d.elements[-1].segments[-1].arrowwidth = 0.3
+            d.elements[-1].segments[-1].arrowlength = 0.5
+
+        check_dependency("schemdraw")
+        from schemdraw import Drawing
+        from schemdraw.flow import Data, RoundBox, Subroutine, Wire
+        from schemdraw.util import Point
+
+        fig = self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_pipeline")
+
+        # Define branches to plot (if called from model, it's only one)
+        branches = []
+        for branch in getattr(self, "_branches", [self.branch]):
+            draw_models, draw_ensembles = [], []
+            for m in models:
+                if m.branch is branch:
+                    if m.acronym not in ("Stack", "Vote"):
+                        draw_models.append(m)
+                    else:
+                        draw_ensembles.append(m)
+
+                        # Additionally, add all dependent models (if not already there)
+                        draw_models.extend([i for i in m._models if i not in draw_models])
+
+            if not models or draw_models:
+                branches.append(
+                    {
+                        "name": branch.name,
+                        "pipeline": list(branch.pipeline),
+                        "models": draw_models,
+                        "ensembles": draw_ensembles,
+                    }
+                )
+
+        # Define colors per branch
+        for branch in branches:
+            if color_branches or (color_branches is None and len(branches) > 1):
+                color = next(BasePlot._fig.palette)
+
+                # Convert back to format accepted by matplotlib
+                branch["color"] = unconvert_from_RGB_255(unlabel_rgb(color))
+            else:
+                branch["color"] = "black"
+
+        # Create schematic drawing
+        d = Drawing(unit=1, backend="matplotlib")
+        d.config(fontsize=self.tick_fontsize)
+        d.add(Subroutine(w=8, s=0.7).label("Raw data"))
+
+        height = 3  # Height of every block
+        length = 5  # Minimum arrow length
+
+        # Define the x-position for every block
+        x_pos = [d.here[0] + length]
+        for i in range(max(len(b["pipeline"]) for b in branches)):
+            len_block = reduce(max, [get_length(b["pipeline"], i) for b in branches])
+            x_pos.append(x_pos[-1] + length + len_block)
+
+        # Add positions for scaling, hyperparameter tuning and models
+        x_pos.extend([x_pos[-1], x_pos[-1]])
+        if any(m.scaler for m in models):
+            x_pos[-1] = x_pos[-2] = x_pos[-3] + length + 7
+        if draw_hyperparameter_tuning and any(m.trials is not None for m in models):
+            x_pos[-1] = x_pos[-2] + length + 11
+
+        positions = {0: d.here}  # Contains the position of every element
+        for branch in branches:
+            d.here = positions[0]
+
+            for i, est in enumerate(branch["pipeline"]):
+                # If the estimator has already been seen, don't draw
+                if id(est) in positions:
+                    # Change location to estimator's end
+                    d.here = positions[id(est)]
+                    continue
+
+                # Draw transformer
+                add_wire(x_pos[i], check_y(d.here))
+                d.add(
+                    RoundBox(w=max(len(est.__class__.__name__) * 0.5, 7))
+                    .label(est.__class__.__name__, color="k")
+                    .color(branch["color"])
+                    .anchor("W")
+                    .drop("E")
+                )
+
+                positions[id(est)] = d.here
+
+            for model in branch["models"]:
+                # Position at last transformer or at start
+                if branch["pipeline"]:
+                    d.here = positions[id(est)]
+                else:
+                    d.here = positions[0]
+
+                # For a single branch, center models
+                if len(branches) == 1:
+                    offset = height * (len(branch["models"]) - 1) / 2
+                else:
+                    offset = 0
+
+                # Draw automated feature scaling
+                if model.scaler:
+                    add_wire(x_pos[-3], check_y((d.here[0], d.here[1] - offset)))
+                    d.add(
+                        RoundBox(w=7)
+                        .label("Scaler", color="k")
+                        .color(branch["color"])
+                        .drop("E")
+                    )
+                    offset = 0
+
+                # Draw hyperparameter tuning
+                if draw_hyperparameter_tuning and model.trials is not None:
+                    add_wire(x_pos[-2], check_y((d.here[0], d.here[1] - offset)))
+                    d.add(
+                        Data(w=11)
+                        .label("Hyperparameter\nTuning", color="k")
+                        .color(branch["color"])
+                        .drop("E")
+                    )
+                    offset = 0
+
+                # Remove classifier/regressor from model's name
+                name = model.estimator.__class__.__name__
+                if name.lower().endswith("classifier"):
+                    name = name[:-10]
+                elif name.lower().endswith("regressor"):
+                    name = name[:-9]
+
+                # Draw model
+                add_wire(x_pos[-1], check_y((d.here[0], d.here[1] - offset)))
+                d.add(
+                    Data(w=max(len(name) * 0.5, 7))
+                    .label(name, color="k")
+                    .color(branch["color"])
+                    .anchor("W")
+                    .drop("E")
+                )
+
+                positions[id(model)] = d.here
+
+        # Draw ensembles
+        max_pos = max(pos[0] for pos in positions.values())  # Max length model names
+        for branch in branches:
+            for model in branch["ensembles"]:
+                # Determine y-position of the ensemble
+                y_pos = [positions[id(m)][1] for m in model._models]
+                offset = height / 2 * (len(branch["ensembles"]) - 1)
+                y = min(y_pos) + (max(y_pos) - min(y_pos)) * 0.5 - offset
+                y = check_y((max_pos + length, max(min(y_pos), y)))
+
+                d.here = (max_pos + length, y)
+
+                d.add(
+                    Data(w=max(len(model._fullname) * 0.5, 7))
+                    .label(model._fullname, color="k")
+                    .color(branch["color"])
+                    .anchor("W")
+                    .drop("E")
+                )
+
+                positions[id(model)] = d.here
+
+                # Draw a wire from every model to the ensemble
+                for m in model._models:
+                    d.here = positions[id(m)]
+                    add_wire(max_pos + length, y)
+
+        if not figsize:
+            dpi, bbox = fig.get_dpi(), d.get_bbox()
+            figsize = (dpi * bbox.xmax // 4, (dpi / 2) * (bbox.ymax - bbox.ymin))
+
+        d.draw(canvas=plt.gca(), showframe=False, show=False)
+        plt.axis("off")
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=plt.gca(),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_pipeline",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_prc(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the precision-recall curve.
+
+        Read more about [PRC][] in sklearn's documentation. Only
+        available for binary classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_det
+        atom.plots:PredictionPlot.plot_lift
+        atom.plots:PredictionPlot.plot_roc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_prc()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            for ds in dataset:
+                y_true, y_pred = m._get_pred(ds, target, attr="thresh")
+
+                # Get precision-recall pairs for different thresholds
+                prec, rec, _ = precision_recall_curve(y_true, y_pred)
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=rec,
+                        y=prec,
+                        mode="lines",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        self._draw_straight_line(sum(m.y_test) / len(m.y_test), xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Recall",
+            ylabel="Precision",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_prc",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task("class"))
+    @composed(crash, plot_from_model)
+    def plot_probabilities(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str = "test",
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the probability distribution of the target classes.
+
+        This plot is available only for models with a `predict_proba`
+        method in classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str, default="test"
+            Data set on which to calculate the metric. Choose from:
+            "train", "test" or "holdout".
+
+        target: int, str or tuple, default=1
+            Probability of being that class in the target column. For
+            multioutput tasks, the value should be a tuple of the form
+            (column, class).
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_confusion_matrix
+        atom.plots:PredictionPlot.plot_results
+        atom.plots:PredictionPlot.plot_threshold
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_probabilities()
+        ```
+
+        """
+        check_predict_proba(models, "plot_probabilities")
+        ds = self._get_set(dataset, max_one=True)
+        col, cls = self.branch._get_target(target)
+        col = lst(self.target)[col]
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            y_true, y_pred = getattr(m, f"y_{ds}"), getattr(m, f"predict_proba_{ds}")
+            for value in np.unique(m.dataset[col]):
+                # Get indices per class
+                if is_multioutput(self.task):
+                    if self.task.startswith("multilabel"):
+                        hist = y_pred.loc[y_true[col] == value, col]
+                    else:
+                        hist = y_pred.loc[cls, col].loc[y_true[col] == value]
+                else:
+                    hist = y_pred.loc[y_true == value, str(cls)]
+
+                fig.add_trace(
+                    go.Scatter(
+                        x=(x := np.linspace(0, 1, 100)),
+                        y=stats.gaussian_kde(hist)(x),
+                        mode="lines",
+                        line=dict(
+                            width=2,
+                            color=BasePlot._fig.get_elem(m.name),
+                            dash=BasePlot._fig.get_elem(ds, "dash"),
+                        ),
+                        fill="tonexty",
+                        fillcolor=f"rgba{BasePlot._fig.get_elem(m.name)[3:-1]}, 0.2)",
+                        fillpattern=dict(shape=BasePlot._fig.get_elem(value, "shape")),
+                        name=f"{col}={value}",
+                        legendgroup=m.name,
+                        legendgrouptitle=dict(text=m.name, font_size=self.label_fontsize),
+                        showlegend=BasePlot._fig.showlegend(f"{m.name}-{value}", legend),
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="toggleitem",
+            xlabel="Probability",
+            ylabel="Probability density",
+            xlim=(0, 1),
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_probabilities",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task("reg"))
+    @composed(crash, plot_from_model)
+    def plot_residuals(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "upper left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot a model's residuals.
+
+        The plot shows the residuals (difference between the predicted
+        and the true value) on the vertical axis and the independent
+        variable on the horizontal axis. The gray, intersected line
+        shows the identity line. This plot can be useful to analyze the
+        variance of the error of the regressor. If the points are
+        randomly dispersed around the horizontal axis, a linear
+        regression model is appropriate for the data; otherwise, a
+        non-linear model is more appropriate. This plot is only
+        available for regression tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str, default="test"
+            Data set on which to calculate the metric. Choose from:
+            "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multioutput tasks][].
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="upper left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_errors
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMRegressor
+        from sklearn.datasets import load_diabetes
+
+        X, y = load_diabetes(return_X_y=True, as_frame=True)
+
+        atom = ATOMRegressor(X, y)
+        atom.run(["OLS", "LGB"])
+        atom.plot_residuals()
+        ```
+
+        """
+        ds = self._get_set(dataset, max_one=True)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes(x=(0, 0.69))
+        xaxis2, yaxis2 = BasePlot._fig.get_axes(x=(0.71, 1.0))
+        for m in models:
+            y_true, y_pred = m._get_pred(ds, target)
+
+            fig.add_trace(
+                go.Scatter(
+                    x=y_true,
+                    y=(res := np.subtract(y_true, y_pred)),
+                    mode="markers",
+                    line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=BasePlot._fig.showlegend(m.name, legend),
+                    xaxis=xaxis,
+                    yaxis=yaxis,
+                )
+            )
+
+            fig.add_trace(
+                go.Histogram(
+                    y=res,
+                    bingroup="residuals",
+                    marker=dict(
+                        color=f"rgba({BasePlot._fig.get_elem(m.name)[4:-1]}, 0.2)",
+                        line=dict(width=2, color=BasePlot._fig.get_elem(m.name)),
+                    ),
+                    name=m.name,
+                    legendgroup=m.name,
+                    showlegend=False,
+                    xaxis=xaxis2,
+                    yaxis=yaxis,
+                )
+            )
+
+        self._draw_straight_line(y=0, xaxis=xaxis, yaxis=yaxis)
+
+        fig.update_layout({f"yaxis{xaxis[1:]}_showgrid": True, "barmode": "overlay"})
+
+        self._plot(
+            ax=(f"xaxis{xaxis2[1:]}", f"yaxis{yaxis2[1:]}"),
+            xlabel="Distribution",
+            title=title,
+        )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            ylabel="Residuals",
+            xlabel="True value",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_residuals",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model)
+    def plot_results(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the model results.
+
+        If all models applied bootstrap, the plot is a boxplot. If
+        not, the plot is a barplot. Models are ordered based on
+        their score from the top down. The score is either the
+        `score_bootstrap` or `score_test` attribute of the model,
+        selected in that order.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        metric: int, str, sequence or None, default=None
+            Metric to plot (only for multi-metric runs). Other available
+            options are "time_bo", "time_fit", "time_bootstrap" and
+            "time". If str, add `+` between options to select more than
+            one. If None, the metric used to run the pipeline is selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of models.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_confusion_matrix
+        atom.plots:PredictionPlot.plot_probabilities
+        atom.plots:PredictionPlot.plot_threshold
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["GNB", "LR", "RF", "LGB"], metric=["f1", "recall"])
+        atom.plot_results()
+
+        atom.run(["GNB", "LR", "RF", "LGB"], metric=["f1", "recall"], n_bootstrap=5)
+        atom.plot_results()
+        atom.plot_results(metric="time_fit+time")
+        ```
+
+        """
+
+        def get_std(model: MODEL, metric: int) -> SCALAR:
+            """Get the standard deviation of the bootstrap scores.
+
+            Parameters
+            ----------
+            model: Model
+                 Model to get the std from.
+
+            metric: int
+                Index of the metric to get it from.
+
+            Returns
+            -------
+            int or float
+                Standard deviation score or 0 if not bootstrapped.
+
+            """
+            if model.bootstrap is None:
+                return 0
+            else:
+                return model.bootstrap.iloc[:, metric].std()
+
+        metric = self._get_metric(metric, max_one=False)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        for met in metric:
+            if isinstance(met, str):
+                color = BasePlot._fig.get_elem(met)
+                fig.add_trace(
+                    go.Bar(
+                        x=[getattr(m, met) for m in models],
+                        y=[m.name for m in models],
+                        orientation="h",
+                        marker=dict(
+                            color=f"rgba({color[4:-1]}, 0.2)",
+                            line=dict(width=2, color=color),
+                        ),
+                        hovertemplate=f"%{{x}}<extra>{met}</extra>",
+                        name=met,
+                        legendgroup=met,
+                        showlegend=BasePlot._fig.showlegend(met, legend),
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+            else:
+                name = self._metric[met].name
+                color = BasePlot._fig.get_elem()
+
+                if all(m.score_bootstrap for m in models):
+                    x = np.array([m.bootstrap.iloc[:, met] for m in models]).ravel()
+                    y = np.array([[m.name] * len(m.bootstrap) for m in models]).ravel()
+                    fig.add_trace(
+                        go.Box(
+                            x=x,
+                            y=list(y),
+                            marker_color=color,
+                            boxpoints="outliers",
+                            orientation="h",
+                            name=name,
+                            legendgroup=name,
+                            showlegend=BasePlot._fig.showlegend(name, legend),
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        )
+                    )
+                else:
+                    fig.add_trace(
+                        go.Bar(
+                            x=[get_best_score(m, met) for m in models],
+                            y=[m.name for m in models],
+                            error_x=dict(
+                                type="data",
+                                array=[get_std(m, met) for m in models],
+                            ),
+                            orientation="h",
+                            marker=dict(
+                                color=f"rgba({color[4:-1]}, 0.2)",
+                                line=dict(width=2, color=color),
+                            ),
+                            hovertemplate="%{x}<extra></extra>",
+                            name=name,
+                            legendgroup=name,
+                            showlegend=BasePlot._fig.showlegend(name, legend),
+                            xaxis=xaxis,
+                            yaxis=yaxis,
+                        )
+                    )
+
+        fig.update_layout(
+            {
+                f"yaxis{yaxis[1:]}": dict(categoryorder="total ascending"),
+                "bargroupgap": 0.05,
+                "boxmode": "group",
+            }
+        )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="time (s)" if all(isinstance(m, str) for m in metric) else "Score",
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + len(models) * 50),
+            plotname="plot_results",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_roc(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        dataset: str | SEQUENCE = "test",
+        target: INT | str = 0,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot the Receiver Operating Characteristics curve.
+
+        Read more about [ROC][] in sklearn's documentation. Only
+        available for classification tasks.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        dataset: str or sequence, default="test"
+            Data set on which to calculate the metric. Use a sequence
+            or add `+` between options to select more than one. Choose
+            from: "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_gains
+        atom.plots:PredictionPlot.plot_lift
+        atom.plots:PredictionPlot.plot_prc
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_roc()
+        ```
+
+        """
+        dataset = self._get_set(dataset, max_one=False)
+        target = self.branch._get_target(target, only_columns=True)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+        for m in models:
+            for ds in dataset:
+                # Get False (True) Positive Rate as arrays
+                fpr, tpr, _ = roc_curve(*m._get_pred(ds, target, attr="thresh"))
+
+                fig.add_trace(
+                    self._draw_line(
+                        x=fpr,
+                        y=tpr,
+                        mode="lines",
+                        parent=m.name,
+                        child=ds,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        self._draw_straight_line(y="diagonal", xaxis=xaxis, yaxis=yaxis)
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlim=(-0.03, 1.03),
+            ylim=(-0.03, 1.03),
+            xlabel="FPR",
+            ylabel="TPR",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_roc",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(ensembles=False))
+    def plot_successive_halving(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: INT | str | SEQUENCE | None = None,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower right",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot scores per iteration of the successive halving.
+
+        Only use with models fitted using [successive halving][].
+        [Ensembles][] are ignored.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        metric: int, str, sequence or None, default=None
+            Metric to plot (only for multi-metric runs). Use a sequence
+            or add `+` between options to select more than one. If None,
+            the metric used to run the pipeline is selected.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower right"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_learning_curve
+        atom.plots:PredictionPlot.plot_results
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.successive_halving(["Tree", "Bag", "RF", "LGB"], n_bootstrap=5)
+        atom.plot_successive_halving()
+        ```
+
+        """
+        metric = self._get_metric(metric, max_one=False)
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        for met in metric:
+            x, y, std = defaultdict(list), defaultdict(list), defaultdict(list)
+            for m in models:
+                x[m._group].append(len(m.branch._idx[1]) // m._train_idx)
+                y[m._group].append(get_best_score(m, met))
+                if m.bootstrap is not None:
+                    std[m._group].append(m.bootstrap.iloc[:, met].std())
+
+            for group in x:
+                fig.add_trace(
+                    self._draw_line(
+                        x=x[group],
+                        y=y[group],
+                        mode="lines+markers",
+                        marker_symbol="circle",
+                        error_y=dict(type="data", array=std[group], visible=True),
+                        parent=group,
+                        child=self._metric[met].name,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+                # Add error bands
+                if m.bootstrap is not None:
+                    fillcolor = f"rgba{BasePlot._fig.get_elem(group)[3:-1]}, 0.2)"
+                    fig.add_traces(
+                        [
+                            go.Scatter(
+                                x=x[group],
+                                y=np.add(y[group], std[group]),
+                                mode="lines",
+                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
+                                hovertemplate="%{y}<extra>upper bound</extra>",
+                                legendgroup=group,
+                                showlegend=False,
+                                xaxis=xaxis,
+                                yaxis=yaxis,
+                            ),
+                            go.Scatter(
+                                x=x[group],
+                                y=np.subtract(y[group], std[group]),
+                                mode="lines",
+                                line=dict(width=1, color=BasePlot._fig.get_elem(group)),
+                                fill="tonexty",
+                                fillcolor=fillcolor,
+                                hovertemplate="%{y}<extra>lower bound</extra>",
+                                legendgroup=group,
+                                showlegend=False,
+                                xaxis=xaxis,
+                                yaxis=yaxis,
+                            ),
+                        ]
+                    )
+
+        fig.update_layout({f"xaxis{yaxis[1:]}": dict(dtick=1, autorange="reversed")})
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            groupclick="togglegroup",
+            title=title,
+            legend=legend,
+            xlabel="n_models",
+            ylabel="Score",
+            figsize=figsize,
+            plotname="plot_successive_halving",
+            filename=filename,
+            display=display,
+        )
+
+    @available_if(has_task(["binary", "multilabel"]))
+    @composed(crash, plot_from_model)
+    def plot_threshold(
+        self,
+        models: INT | str | MODEL | slice | SEQUENCE | None = None,
+        metric: METRIC_SELECTOR = None,
+        dataset: str = "test",
+        target: INT | str = 0,
+        steps: INT = 100,
+        *,
+        title: str | dict | None = None,
+        legend: str | dict | None = "lower left",
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> go.Figure | None:
+        """Plot metric performances against threshold values.
+
+        This plot is available only for models with a `predict_proba`
+        method in a binary or [multilabel][] classification task.
+
+        Parameters
+        ----------
+        models: int, str, Model, slice, sequence or None, default=None
+            Models to plot. If None, all models are selected.
+
+        metric: str, func, scorer, sequence or None, default=None
+            Metric to plot. Choose from any of sklearn's scorers, a
+            function with signature `metric(y_true, y_pred)`, a scorer
+            object or a sequence of these. Use a sequence or add `+`
+            between options to select more than one. If None, the
+            metric used to run the pipeline is selected.
+
+        dataset: str, default="test"
+            Data set on which to calculate the metric. Choose from:
+            "train", "test" or "holdout".
+
+        target: int or str, default=0
+            Target column to look at. Only for [multilabel][] tasks.
+
+        steps: int, default=100
+            Number of thresholds measured.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default="lower left"
+            Legend for the plot. See the [user guide][parameters] for
+            an extended description of the choices.
+
+            - If None: No legend is shown.
+            - If str: Location where to show the legend.
+            - If dict: Legend configuration.
+
+        figsize: tuple, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as html. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [go.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_calibration
+        atom.plots:PredictionPlot.plot_confusion_matrix
+        atom.plots:PredictionPlot.plot_probabilities
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run(["LR", "RF"])
+        atom.plot_threshold()
+        ```
+
+        """
+        check_predict_proba(models, "plot_threshold")
+        ds = self._get_set(dataset, max_one=True)
+        target = self.branch._get_target(target, only_columns=True)
+
+        # Get all metric functions from the input
+        if metric is None:
+            metrics = [m._score_func for m in self._metric]
+        else:
+            metrics = []
+            for m in lst(metric):
+                if isinstance(m, str):
+                    metrics.extend(m.split("+"))
+                else:
+                    metrics.append(m)
+            metrics = [get_custom_scorer(m)._score_func for m in metrics]
+
+        fig = self._get_figure()
+        xaxis, yaxis = BasePlot._fig.get_axes()
+
+        steps = np.linspace(0, 1, steps)
+        for m in models:
+            y_true, y_pred = m._get_pred(ds, target, attr="predict_proba")
+            for met in metrics:
+                fig.add_trace(
+                    self._draw_line(
+                        x=steps,
+                        y=[met(y_true, y_pred >= step) for step in steps],
+                        parent=m.name,
+                        child=met.__name__,
+                        legend=legend,
+                        xaxis=xaxis,
+                        yaxis=yaxis,
+                    )
+                )
+
+        BasePlot._fig.used_models.extend(models)
+        return self._plot(
+            ax=(f"xaxis{xaxis[1:]}", f"yaxis{yaxis[1:]}"),
+            xlabel="Threshold",
+            ylabel="Score",
+            title=title,
+            legend=legend,
+            figsize=figsize,
+            plotname="plot_threshold",
+            filename=filename,
+            display=display,
+        )
diff --git a/atom/plots/shapplot.py b/atom/plots/shapplot.py
new file mode 100644
index 000000000..5c366454c
--- /dev/null
+++ b/atom/plots/shapplot.py
@@ -0,0 +1,866 @@
+# -*- coding: utf-8 -*-
+
+"""
+Automated Tool for Optimized Modelling (ATOM)
+Author: Mavs
+Description: Module containing the ShapPlot class.
+
+"""
+
+from __future__ import annotations
+
+from importlib.util import find_spec
+
+import matplotlib.pyplot as plt
+import shap
+from typeguard import typechecked
+
+from atom.plots.base import BasePlot
+from atom.utils.types import INT, LEGEND, MODEL, SEQUENCE, SLICE
+from atom.utils.utils import check_canvas, composed, crash, plot_from_model
+
+
+@typechecked
+class ShapPlot(BasePlot):
+    """Shap plots.
+
+    ATOM wrapper for plots made by the shap package, using Shapley
+    values for model interpretation. These plots are accessible from
+    the runners or from the models. Only one model can be plotted at
+    the same time since the plots are not made by ATOM.
+
+    """
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_bar(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: SLICE | None = None,
+        show: INT | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's bar plot.
+
+        Create a bar plot of a set of SHAP values. If a single sample
+        is passed, then the SHAP values are plotted. If many samples
+        are passed, then the mean absolute value for each feature
+        column is plotted. Read more about SHAP plots in the
+        [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_bar()`.
+
+        index: int, str, slice, sequence or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_parshap
+        atom.plots:ShapPlot.plot_shap_beeswarm
+        atom.plots:ShapPlot.plot_shap_scatter
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_bar(show=10)
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        show = self._get_show(show, models)
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_bar")
+
+        shap.plots.bar(explanation, max_display=show, show=False)
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            xlabel=plt.gca().get_xlabel(),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_shap_bar",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_beeswarm(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: slice | SEQUENCE | None = None,
+        show: INT | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's beeswarm plot.
+
+        The plot is colored by feature values. Read more about SHAP
+        plots in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_beeswarm()`.
+
+        index: tuple, slice or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set. The beeswarm plot does not support plotting
+            a single sample.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:PredictionPlot.plot_parshap
+        atom.plots:ShapPlot.plot_shap_bar
+        atom.plots:ShapPlot.plot_shap_scatter
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_beeswarm(show=10)
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        show = self._get_show(show, models)
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_beeswarm")
+
+        shap.plots.beeswarm(explanation, max_display=show, show=False)
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            xlabel=plt.gca().get_xlabel(),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_decision(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: SLICE | None = None,
+        show: INT | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's decision plot.
+
+        Visualize model decisions using cumulative SHAP values. Each
+        plotted line explains a single model prediction. If a single
+        prediction is plotted, feature values are printed in the
+        plot (if supplied). If multiple predictions are plotted
+        together, feature values will not be printed. Plotting too
+        many predictions together will make the plot unintelligible.
+        Read more about SHAP plots in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_decision()`.
+
+        index: int, str, slice, sequence or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:ShapPlot.plot_shap_bar
+        atom.plots:ShapPlot.plot_shap_beeswarm
+        atom.plots:ShapPlot.plot_shap_force
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_decision(show=10)
+        atom.plot_shap_decision(index=-1, show=10)
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        show = self._get_show(show, models)
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_decision")
+
+        shap.decision_plot(
+            base_value=explanation.base_values,
+            shap_values=explanation.values,
+            features=rows,
+            feature_display_range=slice(-1, -show - 1, -1),
+            auto_size_plot=False,
+            show=False,
+        )
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            xlabel=plt.gca().get_xlabel(),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_shap_decision",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_force(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: SLICE | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 300),
+        filename: str | None = None,
+        display: bool | None = True,
+        **kwargs,
+    ) -> plt.Figure | None:
+        """Plot SHAP's force plot.
+
+        Visualize the given SHAP values with an additive force layout.
+        Note that by default this plot will render using javascript.
+        For a regular figure use `matplotlib=True` (this option is
+        only available when only a single sample is plotted). Read more
+        about SHAP plots in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_force()`.
+
+        index: int, str, slice, sequence or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=(900, 300)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        **kwargs
+            Additional keyword arguments for [shap.plots.force][force].
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:ShapPlot.plot_shap_beeswarm
+        atom.plots:ShapPlot.plot_shap_scatter
+        atom.plots:ShapPlot.plot_shap_decision
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_force(index=-2, matplotlib=True, figsize=(1800, 300))
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        self._get_figure(create_figure=False, backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_force")
+
+        plot = shap.force_plot(
+            base_value=explanation.base_values,
+            shap_values=explanation.values,
+            features=rows,
+            show=False,
+            **kwargs,
+        )
+
+        if kwargs.get("matplotlib"):
+            BasePlot._fig.used_models.append(models)
+            return self._plot(
+                fig=plt.gcf(),
+                ax=plt.gca(),
+                title=title,
+                legend=legend,
+                figsize=figsize,
+                plotname="plot_shap_force",
+                filename=filename,
+                display=display,
+            )
+        else:
+            if filename:  # Save to a html file
+                if not filename.endswith(".html"):
+                    filename += ".html"
+                shap.save_html(filename, plot)
+            if display and find_spec("IPython"):
+                from IPython.display import display
+
+                shap.initjs()
+                display(plot)
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_heatmap(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: slice | SEQUENCE | None = None,
+        show: INT | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's heatmap plot.
+
+        This plot is designed to show the population substructure of a
+        dataset using supervised clustering and a heatmap. Supervised
+        clustering involves clustering data points not by their original
+        feature values but by their explanations. Read more about SHAP
+        plots in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_heatmap()`.
+
+        index: slice, sequence or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set. The plot_shap_heatmap method does not
+            support plotting a single sample.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:ShapPlot.plot_shap_decision
+        atom.plots:ShapPlot.plot_shap_force
+        atom.plots:ShapPlot.plot_shap_waterfall
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_heatmap(show=10)
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        show = self._get_show(show, models)
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_heatmap")
+
+        shap.plots.heatmap(explanation, max_display=show, show=False)
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            xlabel=plt.gca().get_xlabel(),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_shap_heatmap",
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_scatter(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: slice | SEQUENCE | None = None,
+        columns: INT | str = 0,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] = (900, 600),
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's scatter plot.
+
+        Plots the value of the feature on the x-axis and the SHAP value
+        of the same feature on the y-axis. This shows how the model
+        depends on the given feature, and is like a richer extension of
+        the classical partial dependence plots. Vertical dispersion of
+        the data points represents interaction effects. Read more about
+        SHAP plots in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_scatter()`.
+
+        index: slice, sequence or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set. The plot_shap_scatter method does not
+            support plotting a single sample.
+
+        columns: int or str, default=0
+            Column to plot.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=(900, 600)
+            Figure's size in pixels, format as (x, y).
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:ShapPlot.plot_shap_beeswarm
+        atom.plots:ShapPlot.plot_shap_decision
+        atom.plots:ShapPlot.plot_shap_force
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_scatter(columns="symmetry error")
+        ```
+
+        """
+        rows = models.X.loc[models.branch._get_rows(index)]
+        column = models.branch._get_columns(columns, include_target=False)[0]
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        # Get explanation for a specific column
+        explanation = explanation[:, models.columns.get_loc(column)]
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_scatter")
+
+        shap.plots.scatter(explanation, color=explanation, ax=plt.gca(), show=False)
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            xlabel=plt.gca().get_xlabel(),
+            ylabel=plt.gca().get_ylabel(),
+            title=title,
+            legend=legend,
+            plotname="plot_shap_scatter",
+            figsize=figsize,
+            filename=filename,
+            display=display,
+        )
+
+    @composed(crash, plot_from_model(max_one=True))
+    def plot_shap_waterfall(
+        self,
+        models: INT | str | MODEL | None = None,
+        index: INT | str | None = None,
+        show: INT | None = None,
+        target: INT | str | tuple = 1,
+        *,
+        title: str | dict | None = None,
+        legend: LEGEND | dict | None = None,
+        figsize: tuple[INT, INT] | None = None,
+        filename: str | None = None,
+        display: bool | None = True,
+    ) -> plt.Figure | None:
+        """Plot SHAP's waterfall plot.
+
+        The SHAP value of a feature represents the impact of the
+        evidence provided by that feature on the model’s output. The
+        waterfall plot is designed to visually display how the SHAP
+        values (evidence) of each feature move the model output from
+        our prior expectation under the background data distribution,
+        to the final model prediction given the evidence of all the
+        features. Features are sorted by the magnitude of their SHAP
+        values with the smallest magnitude features grouped together
+        at the bottom of the plot when the number of features in the
+        models exceeds the `show` parameter. Read more about SHAP plots
+        in the [user guide][shap].
+
+        Parameters
+        ----------
+        models: int, str, Model or None, default=None
+            Model to plot. If None, all models are selected. Note that
+            leaving the default option could raise an exception if there
+            are multiple models. To avoid this, call the plot directly
+            from a model, e.g. `atom.lr.plot_shap_waterfall()`.
+
+        index: int, str or None, default=None
+            Rows in the dataset to plot. If None, it selects all rows
+            in the test set. The plot_shap_waterfall method does not
+            support plotting multiple samples.
+
+        show: int or None, default=None
+            Number of features (ordered by importance) to show. If
+            None, it shows all features.
+
+        target: int, str or tuple, default=1
+            Class in the target column to target. For multioutput tasks,
+            the value should be a tuple of the form (column, class).
+            Note that for binary and multilabel tasks, the selected
+            class is always the positive one.
+
+        title: str, dict or None, default=None
+            Title for the plot.
+
+            - If None, no title is shown.
+            - If str, text for the title.
+            - If dict, [title configuration][parameters].
+
+        legend: str, dict or None, default=None
+            Does nothing. Implemented for continuity of the API.
+
+        figsize: tuple or None, default=None
+            Figure's size in pixels, format as (x, y). If None, it
+            adapts the size to the number of features shown.
+
+        filename: str or None, default=None
+            Save the plot using this name. Use "auto" for automatic
+            naming. The type of the file depends on the provided name
+            (.html, .png, .pdf, etc...). If `filename` has no file type,
+            the plot is saved as png. If None, the plot is not saved.
+
+        display: bool or None, default=True
+            Whether to render the plot. If None, it returns the figure.
+
+        Returns
+        -------
+        [plt.Figure][] or None
+            Plot object. Only returned if `display=None`.
+
+        See Also
+        --------
+        atom.plots:ShapPlot.plot_shap_bar
+        atom.plots:ShapPlot.plot_shap_beeswarm
+        atom.plots:ShapPlot.plot_shap_heatmap
+
+        Examples
+        --------
+        ```pycon
+        from atom import ATOMClassifier
+        from sklearn.datasets import load_breast_cancer
+
+        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
+
+        atom = ATOMClassifier(X, y, random_state=1)
+        atom.run("LR")
+        atom.plot_shap_waterfall(show=10)
+        ```
+
+        """
+        rows = models.X.loc[[models.branch._get_rows(index)[0]]]
+        show = self._get_show(show, models)
+        target = self.branch._get_target(target)
+        explanation = models._shap.get_explanation(rows, target)
+
+        # Waterfall accepts only one row
+        explanation.values = explanation.values[0]
+        explanation.data = explanation.data[0]
+
+        self._get_figure(backend="matplotlib")
+        check_canvas(BasePlot._fig.is_canvas, "plot_shap_waterfall")
+
+        shap.plots.waterfall(explanation, max_display=show, show=False)
+
+        BasePlot._fig.used_models.append(models)
+        return self._plot(
+            ax=plt.gca(),
+            title=title,
+            legend=legend,
+            figsize=figsize or (900, 400 + show * 50),
+            plotname="plot_shap_waterfall",
+            filename=filename,
+            display=display,
+        )
diff --git a/atom/training.py b/atom/training.py
index de5af1989..decf3926a 100644
--- a/atom/training.py
+++ b/atom/training.py
@@ -20,7 +20,8 @@
 
 from atom.basetrainer import BaseTrainer
 from atom.utils.types import (
-    BOOL, ENGINE, GOAL, INT, INT_TYPES, METRIC_SELECTOR, PREDICTOR, SEQUENCE,
+    BOOL, ENGINE, INT, INT_TYPES, METRIC_SELECTOR, PREDICTOR, SEQUENCE,
+    WARNINGS,
 )
 from atom.utils.utils import (
     ClassMap, composed, crash, get_best_score, infer_task, lst, method_to_log,
@@ -341,7 +342,7 @@ class DirectClassifier(Direct):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -460,12 +461,12 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: str = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "class"
+        self.goal = "class"
         super().__init__(
             models, metric, est_params, n_trials, ht_params, n_bootstrap,
             parallel, errors, n_jobs, device, engine, backend, verbose,
@@ -559,7 +560,7 @@ class DirectForecaster(Direct):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -675,12 +676,12 @@ def __init__(
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: str = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: BOOL | str = False,
+        warnings: BOOL | WARNINGS = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "fc"
+        self.goal = "fc"
         super().__init__(
             models, metric, est_params, n_trials, ht_params, n_bootstrap,
             parallel, errors, n_jobs, device, engine, backend, verbose, warnings,
@@ -774,7 +775,7 @@ class DirectRegressor(Direct):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -886,19 +887,19 @@ def __init__(
         n_trials: INT | dict | SEQUENCE = 0,
         ht_params: dict | None = None,
         n_bootstrap: INT | dict | SEQUENCE = 0,
-        parallel: bool = False,
+        parallel: BOOL = False,
         errors: Literal["raise", "skip", "keep"] = "skip",
         n_jobs: INT = 1,
         device: str = "cpu",
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: str = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: bool | str = False,
+        warnings: BOOL | str = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "reg"
+        self.goal = "reg"
         super().__init__(
             models, metric, est_params, n_trials, ht_params, n_bootstrap,
             parallel, errors, n_jobs, device, engine, backend, verbose, warnings,
@@ -999,7 +1000,7 @@ class SuccessiveHalvingClassifier(SuccessiveHalving):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -1112,19 +1113,19 @@ def __init__(
         n_trials: INT | dict | SEQUENCE = 0,
         ht_params: dict | None = None,
         n_bootstrap: INT | dict | SEQUENCE = 0,
-        parallel: bool = False,
+        parallel: BOOL = False,
         errors: Literal["raise", "skip", "keep"] = "skip",
         n_jobs: INT = 1,
         device: str = "cpu",
         engine: ENGINE = {"data": "numpy", "estimator": "sklearn"},
         backend: str = "loky",
         verbose: Literal[0, 1, 2] = 0,
-        warnings: bool | str = False,
+        warnings: BOOL | str = False,
         logger: str | Logger | None = None,
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "class"
+        self.goal = "class"
         super().__init__(
             models, metric, skip_runs, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
@@ -1221,7 +1222,7 @@ class SuccessiveHalvingForecaster(SuccessiveHalving):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -1343,7 +1344,7 @@ def __init__(
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "fc"
+        self.goal = "fc"
         super().__init__(
             models, metric, skip_runs, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
@@ -1440,7 +1441,7 @@ class SuccessiveHalvingRegressor(SuccessiveHalving):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -1565,7 +1566,7 @@ def __init__(
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "reg"
+        self.goal = "reg"
         super().__init__(
             models, metric, skip_runs, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
@@ -1671,7 +1672,7 @@ class TrainSizingClassifier(TrainSizing):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -1796,7 +1797,7 @@ def __init__(
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "class"
+        self.goal = "class"
         super().__init__(
             models, metric, train_sizes, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
@@ -1898,7 +1899,7 @@ class TrainSizingForecaster(TrainSizing):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -2020,7 +2021,7 @@ def __init__(
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "fc"
+        self.goal = "fc"
         super().__init__(
             models, metric, train_sizes, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
@@ -2122,7 +2123,7 @@ class TrainSizingRegressor(TrainSizing):
         - "keep": Keep the model in its state at failure. Note that
           this model can break down many other methods after training.
           This option is useful to be able to rerun hyperparameter
-          optimization after failure without losing previous succesfull
+          optimization after failure without losing previous successful
           trials.
 
     n_jobs: int, default=1
@@ -2247,7 +2248,7 @@ def __init__(
         experiment: str | None = None,
         random_state: INT | None = None,
     ):
-        self.goal: GOAL = "reg"
+        self.goal = "reg"
         super().__init__(
             models, metric, train_sizes, est_params, n_trials, ht_params,
             n_bootstrap, parallel, errors, n_jobs, device, engine, backend,
diff --git a/atom/utils/types.py b/atom/utils/types.py
index e124cc5e4..e362619ec 100644
--- a/atom/utils/types.py
+++ b/atom/utils/types.py
@@ -9,7 +9,9 @@
 
 from __future__ import annotations
 
-from typing import Callable, Literal, Protocol, TypedDict, Union
+from typing import (
+    Callable, Literal, Protocol, TypedDict, Union, runtime_checkable,
+)
 
 import modin.pandas as md
 import numpy as np
@@ -55,8 +57,6 @@
 FEATURES = Union[iter, dict, list, tuple, np.ndarray, sps.spmatrix, DATAFRAME]
 TARGET = Union[INT, str, dict, SEQUENCE, DATAFRAME]
 
-BACKEND = Literal["loky", "multiprocessing", "threading", "ray"]
-
 DATASET = Literal[
     "dataset",
     "train",
@@ -73,19 +73,40 @@
 ]
 
 # Selection of rows or columns by name or position
-SLICE = Union[INT | str | slice | SEQUENCE]
+SLICE = Union[INT, str, slice, SEQUENCE]
 
 # Assignment of index or stratify parameter
-INDEX_SELECTOR = Union[bool | INT | str | SEQUENCE]
+INDEX_SELECTOR = Union[bool, INT, str, SEQUENCE]
 
-# Allowed values for the goal attribute
-GOAL = Literal["class", "reg", "fc"]
+# Types to initialize a metric
+METRIC_SELECTOR = (str, Callable[..., SCALAR], SEQUENCE, None)
 
-# Metric selectors
-METRIC_SELECTOR = Union[str, Callable[..., SCALAR], SEQUENCE | None]
+# Allowed values for BaseTransformer parameter
+BACKEND = Literal["loky", "multiprocessing", "threading", "ray"]
+WARNINGS = Literal["default", "error", "ignore", "always", "module", "once"]
 
-# Pruning strategies
-PRUNING = Literal["zscore", "iforest", "ee", "lof", "svm", "dbscan", "hdbscan", "optics"]
+# Data cleaning parameters
+STRAT_NUM = SCALAR | Literal["drop", "mean", "median", "knn", "most_frequent"]
+DISCRETIZER_STRATS = Literal["uniform", "quantile", "kmeans", "custom"]
+PRUNER_STRATS = Literal[
+    "zscore", "iforest", "ee", "lof", "svm", "dbscan", "hdbscan", "optics"
+]
+SCALER_STRATS = Literal["standard", "minmax", "maxabs", "robust"]
+
+
+# Plotting parameters
+LEGEND = Literal[
+    "upper left",
+    "lower left",
+    "upper right",
+    "lower right",
+    "upper center",
+    "lower center",
+    "center left",
+    "center right",
+    "center",
+    "out",
+]
 
 
 # Classes for type hinting ========================================= >>
@@ -96,28 +117,32 @@ class ENGINE(TypedDict, total=False):
     estimator: Literal["sklearn", "sklearnex", "cuml"]
 
 
+@runtime_checkable
 class SCORER(Protocol):
     """Protocol for all scorers."""
     def _score(self, method_caller, clf, X, y, sample_weight=None): ...
 
 
+@runtime_checkable
 class TRANSFORMER(Protocol):
     """Protocol for all predictors."""
-    def fit(self, **params): ...
     def transform(self, **params): ...
 
 
+@runtime_checkable
 class PREDICTOR(Protocol):
     """Protocol for all predictors."""
     def fit(self, **params): ...
     def predict(self, **params): ...
 
 
+@runtime_checkable
 class ESTIMATOR(Protocol):
     """Protocol for all estimators."""
     def fit(self, **params): ...
 
 
+@runtime_checkable
 class BRANCH(Protocol):
     """Protocol for the Branch class."""
     def _get_rows(self, **params): ...
@@ -125,12 +150,14 @@ def _get_columns(self, **params): ...
     def _get_target(self, **params): ...
 
 
+@runtime_checkable
 class MODEL(Protocol):
     """Protocol for all models."""
-    def est_class(self): ...
-    def get_estimator(self, **params): ...
+    def _est_class(self): ...
+    def _get_est(self, **params): ...
 
 
+@runtime_checkable
 class RUNNER(Protocol):
     """Protocol for all runners."""
     def run(self, **params): ...
diff --git a/atom/utils/utils.py b/atom/utils/utils.py
index dce01fcb1..9f2fb715e 100644
--- a/atom/utils/utils.py
+++ b/atom/utils/utils.py
@@ -14,7 +14,7 @@
 import sys
 import tempfile
 import warnings
-from collections import OrderedDict, deque
+from collections import deque
 from collections.abc import MutableMapping
 from contextlib import contextmanager
 from copy import copy, deepcopy
@@ -25,10 +25,10 @@
 from importlib.util import find_spec
 from inspect import Parameter, signature
 from itertools import cycle
-from types import GeneratorType
+from types import GeneratorType, MappingProxyType
 from typing import Any, Callable
 from unittest.mock import patch
-
+from joblib import Memory
 import mlflow
 import modin.pandas as md
 import numpy as np
@@ -54,7 +54,7 @@
     BRANCH, DATAFRAME, DATAFRAME_TYPES, ESTIMATOR, FEATURES, FLOAT,
     INDEX_SELECTOR, INT, INT_TYPES, MODEL, PANDAS, PANDAS_TYPES, PREDICTOR,
     SCALAR, SCORER, SEQUENCE, SEQUENCE_TYPES, SERIES, SERIES_TYPES, TARGET,
-    TRANSFORMER,
+    TRANSFORMER, BOOL
 )
 
 
@@ -118,10 +118,11 @@ def __init__(self, scorer: SCORER, task: str):
         self.scorer = scorer
         self.task = task
 
-    @staticmethod
-    def get_final_error(error: FLOAT, weight: FLOAT) -> FLOAT:
+    def get_final_error(self, error: FLOAT, weight: FLOAT) -> FLOAT:
         """Returns final value of metric based on error and weight.
 
+        Can't be a `staticmethod` because of CatBoost's implementation.
+
         Parameters
         ----------
         error: float
@@ -1253,7 +1254,7 @@ def to_rgb(c: str) -> str:
     return c
 
 
-def sign(obj: Callable) -> OrderedDict:
+def sign(obj: Callable) -> MappingProxyType:
     """Get the parameters of an object.
 
     Parameters
@@ -1263,7 +1264,7 @@ def sign(obj: Callable) -> OrderedDict:
 
     Returns
     -------
-    OrderedDict
+    mappingproxy
         Object's parameters.
 
     """
@@ -1315,7 +1316,7 @@ def get_cols(elem: PANDAS) -> list[SERIES]:
 def variable_return(
     X: DATAFRAME | None,
     y: SERIES | None,
-) -> DATAFRAME | SERIES | tuple[DATAFRAME, SERIES]:
+) -> DATAFRAME | SERIES | tuple[DATAFRAME, PANDAS]:
     """Return one or two arguments depending on which is None.
 
     This utility is used to make methods return only the provided
@@ -1326,7 +1327,7 @@ def variable_return(
     X: dataframe or None
         Feature set.
 
-    y: series or None
+    y: series, dataframe or None
         Target column.
 
     Returns
@@ -1666,7 +1667,10 @@ def to_pyarrow(column: SERIES, inverse: bool = False) -> str:
 
     """
     if not inverse and not column.dtype.name.endswith("[pyarrow]"):
-        return f"{column.dtype.name}[pyarrow]"
+        if column.dtype.name == "object":
+            return "string[pyarrow]"  # pyarrow doesn't support object
+        else:
+            return f"{column.dtype.name}[pyarrow]"
     elif inverse and column.dtype.name.endswith("[pyarrow]"):
         return column.dtype.name[:-9]
 
@@ -2092,7 +2096,12 @@ def get_feature_importance(
         return np.abs(data.flatten())
 
 
-def export_pipeline(pipeline: pd.Series, model: MODEL | None, memory, verbose) -> Any:
+def export_pipeline(
+    pipeline: pd.Series,
+    model: MODEL | None = None,
+    memory: BOOL | str | Memory | None = None,
+    verbose: INT | None = None,
+) -> Any:
     """Export a pipeline to a sklearn-like object.
 
     Optionally, you can add a model as final estimator.
@@ -2516,7 +2525,7 @@ def fit_transform_one(
     y: TARGET | None = None,
     message: str | None = None,
     **fit_params,
-) -> tuple[DATAFRAME | None, SERIES | None]:
+) -> tuple[DATAFRAME | None, SERIES | None, TRANSFORMER]:
     """Fit and transform the data using one estimator.
 
     Parameters
@@ -2565,10 +2574,10 @@ def fit_transform_one(
 def custom_transform(
     transformer: TRANSFORMER,
     branch: BRANCH,
-    data: tuple[DATAFRAME, SERIES] | None = None,
+    data: tuple[DATAFRAME, PANDAS] | None = None,
     verbose: int | None = None,
     method: str = "transform",
-) -> tuple[DATAFRAME, SERIES]:
+) -> tuple[DATAFRAME, PANDAS]:
     """Applies a transformer on a branch.
 
     This function is generic and should work for all
@@ -2600,8 +2609,8 @@ def custom_transform(
     dataframe
         Feature set.
 
-    series
-        Target column.
+    series or dataframe
+        Target column(s).
 
     """
     # Select provided data or from the branch
diff --git a/docs/404.html b/docs/404.html
index 3d4c3e8b2..5ea3a762c 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -1147,7 +1147,7 @@
   
     <li class="md-nav__item">
       <a href="/ATOM/API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1189,7 +1189,7 @@
   
     <li class="md-nav__item">
       <a href="/ATOM/API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1231,7 +1231,7 @@
   
     <li class="md-nav__item">
       <a href="/ATOM/API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/API/ATOM/atomclassifier/index.html b/docs/API/ATOM/atomclassifier/index.html
index 6220e7e60..8806c1621 100644
--- a/docs/API/ATOM/atomclassifier/index.html
+++ b/docs/API/ATOM/atomclassifier/index.html
@@ -1288,7 +1288,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1330,7 +1330,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1372,7 +1372,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3392,7 +3392,7 @@
 
 <h1 id="atomclassifier">ATOMClassifier</h1>
 <hr />
-<p><a id='ATOMClassifier'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMClassifier</strong>(*arrays, y=-1, index=False, shuffle=True, stratify=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L114>[source]</a></span></div>Main class for classification tasks.</p>
+<p><a id='ATOMClassifier'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMClassifier</strong>(*arrays, y=-1, index=False, shuffle=True, stratify=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L120>[source]</a></span></div>Main class for classification tasks.</p>
 <p>Apply all data transformations and model management provided by
 the package on a given dataset. Note that, contrary to sklearn's
 API, the instance contains the dataset on which to perform the
@@ -3427,7 +3427,6 @@ <h1 id="atomclassifier">ATOMClassifier</h1>
 <p></div><a id='atomclassifier-y'></a><strong>y: int, str, dict, sequence or dataframe, default=-1</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
 <ul>
-<li>If None: y is ignored.</li>
 <li>If int: Position of the target column in X.</li>
 <li>If str: Name of the target column in X.</li>
 <li>If sequence: Target array with shape=(n_samples,) or
@@ -3496,17 +3495,16 @@ <h1 id="atomclassifier">ATOMClassifier</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='atomclassifier-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='atomclassifier-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3514,7 +3512,7 @@ <h1 id="atomclassifier">ATOMClassifier</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3611,15 +3609,15 @@ <h3 id="data-attributes">Data attributes</h3>
 visualize the pipeline, use the <a class="autorefs autorefs-internal" href="../../plots/plot_pipeline/#plot_pipeline">plot_pipeline</a> method.</div><a id='atomclassifier-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='atomclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomclassifier-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomclassifier-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='atomclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomclassifier-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
+etc...).</div><a id='atomclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomclassifier-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomclassifier-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='atomclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomclassifier-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
 <p>A data set is considered scaled when it has mean=0 and std=1,
 or when there is a scaler in the pipeline. Binary columns (only
-0s and 1s) are excluded from the calculation.</div><a id='atomclassifier-duplicates'></a><strong>duplicates: series</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomclassifier-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
+0s and 1s) are excluded from the calculation.</div><a id='atomclassifier-duplicates'></a><strong>duplicates: int</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomclassifier-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
 <p>These values are used by the <a class="autorefs autorefs-internal" href="#atomclassifier-clean">clean</a> and
 <a class="autorefs autorefs-internal" href="#atomclassifier-impute">impute</a> methods. Default values are: None, NaN,
 NaT, +inf, -inf, "", "?", "None", "NA", "nan", "NaN", "NaT",
 "inf". Note that None, NaN, +inf and -inf are always considered
-missing since they are incompatible with sklearn estimators.</div><a id='atomclassifier-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomclassifier-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomclassifier-numerical'></a><strong>numerical: series</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomclassifier-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomclassifier-categorical'></a><strong>categorical: series</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomclassifier-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomclassifier-outliers'></a><strong>outliers: series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomclassifier-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div><a id='atomclassifier-classes'></a><strong>classes: pd.DataFrame | None</strong><br><div markdown class='param'>Distribution of target classes per data set.</div><a id='atomclassifier-n_classes'></a><strong>n_classes: int | series | None</strong><br><div markdown class='param'>Number of classes in the target column(s).</div></td></tr></p>
+missing since they are incompatible with sklearn estimators.</div><a id='atomclassifier-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomclassifier-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomclassifier-numerical'></a><strong>numerical: index</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomclassifier-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomclassifier-categorical'></a><strong>categorical: index</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomclassifier-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomclassifier-outliers'></a><strong>outliers: pd.Series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomclassifier-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div><a id='atomclassifier-classes'></a><strong>classes: pd.DataFrame | None</strong><br><div markdown class='param'>Distribution of target classes per data set.</div><a id='atomclassifier-n_classes'></a><strong>n_classes: int | series | None</strong><br><div markdown class='param'>Number of classes in the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3669,7 +3667,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomclassifier-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomclassifier-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='atomclassifier-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='atomclassifier-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='atomclassifier-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='atomclassifier-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='atomclassifier-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3681,7 +3679,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-add">add</a></td><td>Add a transformer to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-apply">apply</a></td><td>Apply a function to the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-automl">automl</a></td><td>Search for an optimized pipeline in an automated fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-distribution">distribution</a></td><td>Get statistics on column distributions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-eda">eda</a></td><td>Create an Exploratory Data Analysis report.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-get_sample_weight">get_sample_weight</a></td><td>Return sample weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-load">load</a></td><td>Loads an atom instance from a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-reset">reset</a></td><td>Reset the instance to it's initial state.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-save_data">save_data</a></td><td>Save the data in the current branch to a <code>.csv</code> file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-shrink">shrink</a></td><td>Converts the columns to the smallest possible matching dtype.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-stats">stats</a></td><td>Display basic information about the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-status">status</a></td><td>Get an overview of the branches and models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomclassifier-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1134>[source]</a></span></div>Add a transformer to the pipeline.</p>
+<p><a id='atomclassifier-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1156>[source]</a></span></div>Add a transformer to the pipeline.</p>
 <p>If the transformer is not fitted, it is fitted on the complete
 training set. Afterwards, the data set is transformed and the
 estimator is added to atom's pipeline. If the estimator is
@@ -3744,10 +3742,9 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1217>[source]</a></span></div>Apply a function to the dataset.</p>
-<p>The function should have signature <code>func(dataset, **kw_args) -&gt;
-dataset</code>. This method is useful for stateless transformations
-such as taking the log, doing custom scaling, etc...</p>
+<p><a id='atomclassifier-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1239>[source]</a></span></div>Apply a function to the dataset.</p>
+<p>This method is useful for stateless transformations such as
+taking the log, doing custom scaling, etc...</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>This approach is preferred over changing the dataset directly
@@ -3760,7 +3757,8 @@ <h2 id="utility-methods">Utility methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='apply-func'></a><strong>func: callable</strong><br><div markdown class='param'>
-Function to apply.</p>
+Function to apply with signature <code>func(dataset, **kw_args) -&gt;
+dataset</code>.</p>
 <p></div><a id='apply-inverse_func'></a><strong>inverse_func: callable or None, default=None</strong><br><div markdown class='param'>
 Inverse function of <code>func</code>. If None, the inverse_transform
 method returns the input unchanged.</p>
@@ -3771,7 +3769,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L308>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
+<p><a id='atomclassifier-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L315>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
 <p>Automated machine learning (AutoML) automates the selection,
 composition and parameterization of machine learning pipelines.
 Automating the machine learning often provides faster, more
@@ -3793,7 +3791,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='atomclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3815,7 +3813,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='atomclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3860,7 +3858,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='atomclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#atomclassifier-save">saving</a> the instance. The affected
@@ -3875,7 +3873,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='atomclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='atomclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#atomclassifier-save">saving</a>. Deleted models are not removed from
@@ -3886,7 +3884,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L399>[source]</a></span></div>Get statistics on column distributions.</p>
+<p><a id='atomclassifier-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L406>[source]</a></span></div>Get statistics on column distributions.</p>
 <p>Compute the <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">Kolmogorov-Smirnov test</a> for various
 distributions against columns in the dataset. Only for numerical
 columns. Missing values are ignored.</p>
@@ -3916,7 +3914,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L480>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
+<p><a id='atomclassifier-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L487>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
 <p>ATOM uses the <a href="https://github.com/ydataai/ydata-profiling">ydata-profiling</a> package for the EDA.
 The report is rendered directly in the notebook. The created
 <a href="https://ydata-profiling.ydata.ai/docs/master/pages/reference/api/_autosummary/ydata_profiling.profile_report.ProfileReport.html">ProfileReport</a> instance can be accessed through the <code>report</code>
@@ -3940,7 +3938,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='atomclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3967,7 +3965,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='atomclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -4006,7 +4004,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='atomclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -4021,7 +4019,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L666>[source]</a></span></div>Return sample weights for a balanced data set.</p>
+<p><a id='atomclassifier-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L665>[source]</a></span></div>Return sample weights for a balanced data set.</p>
 <p>The returned weights are inversely proportional to the class
 frequencies in the selected data set. For <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multioutput-tasks">multioutput tasks</a>,
 the weights of each column of <code>y</code> will be multiplied.</p>
@@ -4034,7 +4032,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L533>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='atomclassifier-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L540>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -4064,7 +4062,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L592>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
+<p><a id='atomclassifier-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L599>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
 <p>If the instance was <a class="autorefs autorefs-internal" href="#atomclassifier-save">saved</a> using <code>save_data=False</code>,
 it's possible to load new data into it and apply all data
 transformations.</p>
@@ -4114,7 +4112,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='atomclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -4126,7 +4124,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='atomclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -4144,7 +4142,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='atomclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4153,7 +4151,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='atomclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4162,13 +4160,13 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L719>[source]</a></span></div>Reset the instance to it's initial state.</p>
+<p><a id='atomclassifier-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L726>[source]</a></span></div>Reset the instance to it's initial state.</p>
 <p>Deletes all branches and models. The dataset is also reset
 to its form after initialization.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='atomclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='atomclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4179,7 +4177,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L738>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
+<p><a id='atomclassifier-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L745>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_data-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4190,7 +4188,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L762>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
+<p><a id='atomclassifier-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L775>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='shrink-int2bool'></a><strong>int2bool: bool, default=False</strong><br><div markdown class='param'>
 Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the
@@ -4211,7 +4209,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='atomclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4231,18 +4229,18 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L894>[source]</a></span></div>Display basic information about the dataset.</p>
+<p><a id='atomclassifier-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L907>[source]</a></span></div>Display basic information about the dataset.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='stats-_vb'></a><strong>_vb: int, default=-2</strong><br><div markdown class='param'>
 Internal parameter to always print if called by user.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L958>[source]</a></span></div>Get an overview of the branches and models.</p>
+<p><a id='atomclassifier-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L971>[source]</a></span></div>Get an overview of the branches and models.</p>
 <p>This method prints the same information as the __repr__ and
 also saves it to the logger.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L968>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='atomclassifier-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L981>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4272,7 +4270,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='atomclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4305,7 +4303,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-balance">balance</a></td><td>Balance the number of rows per class in the target column.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-clean">clean</a></td><td>Applies standard data cleaning steps on the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-discretize">discretize</a></td><td>Bin continuous data into intervals.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-encode">encode</a></td><td>Perform encoding of categorical features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-impute">impute</a></td><td>Handle missing values in the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-normalize">normalize</a></td><td>Transform the data to follow a Normal/Gaussian distribution.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-prune">prune</a></td><td>Prune outliers from the training set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-scale">scale</a></td><td>Scale the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomclassifier-balance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>balance</strong>(strategy="adasyn", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1272>[source]</a></span></div>Balance the number of rows per class in the target column.</p>
+<p><a id='atomclassifier-balance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>balance</strong>(strategy="adasyn", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1294>[source]</a></span></div>Balance the number of rows per class in the target column.</p>
 <p>When oversampling, the newly created samples have an increasing
 integer index for numerical indices, and an index of the form
 [estimator]_N for non-numerical indices, where N stands for the
@@ -4326,7 +4324,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 of the target class distribution per data set.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1312>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
+<p><a id='atomclassifier-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1334>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
 <p>Use the parameters to choose which transformations to perform.
 The available steps are:</p>
 <ul>
@@ -4340,7 +4338,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 </ul>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/cleaner/#cleaner">Cleaner</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1359>[source]</a></span></div>Bin continuous data into intervals.</p>
+<p><a id='atomclassifier-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1381>[source]</a></span></div>Bin continuous data into intervals.</p>
 <p>For each feature, the bin edges are computed during fit
 and, together with the number of bins, they will define the
 intervals. Ignores numerical columns.</p>
@@ -4351,7 +4349,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution and decide on the bins.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1391>[source]</a></span></div>Perform encoding of categorical features.</p>
+<p><a id='atomclassifier-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1413>[source]</a></span></div>Perform encoding of categorical features.</p>
 <p>The encoding type depends on the number of classes in the
 column:</p>
 <ul>
@@ -4376,7 +4374,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 list of the categorical features in the dataset.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1444>[source]</a></span></div>Handle missing values in the dataset.</p>
+<p><a id='atomclassifier-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1466>[source]</a></span></div>Handle missing values in the dataset.</p>
 <p>Impute or remove missing values according to the selected
 strategy. Also removes rows and columns with too many missing
 values. Use the <code>missing</code> attribute to customize what are
@@ -4388,7 +4386,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 missing values per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1482>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
+<p><a id='atomclassifier-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1504>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
 <p>This transformation is useful for modeling issues related
 to heteroscedasticity (non-constant variance), or other
 situations where normality is desired. Missing values are
@@ -4401,7 +4399,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1512>[source]</a></span></div>Prune outliers from the training set.</p>
+<p><a id='atomclassifier-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1538>[source]</a></span></div>Prune outliers from the training set.</p>
 <p>Replace or remove outliers. The definition of outlier depends
 on the selected strategy and can greatly differ from one
 another. Ignores categorical columns.</p>
@@ -4418,7 +4416,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 number of outliers per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomclassifier-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1556>[source]</a></span></div>Scale the data.</p>
+<p><a id='atomclassifier-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1582>[source]</a></span></div>Scale the data.</p>
 <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> class for a description of the parameters.</p>
 <div class="admonition tip">
@@ -4437,7 +4435,7 @@ <h2 id="nlp">NLP</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-textclean">textclean</a></td><td>Applies standard text cleaning to the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-textnormalize">textnormalize</a></td><td>Normalize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-tokenize">tokenize</a></td><td>Tokenize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-vectorize">vectorize</a></td><td>Vectorize the corpus.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomclassifier-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1583>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
+<p><a id='atomclassifier-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1609>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
 <p>Transformations include normalizing characters and dropping
 noise from the text (emails, HTML tags, URLs, etc...). The
 transformations are applied on the column named <code>corpus</code>, in
@@ -4446,7 +4444,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textcleaner/#textcleaner">TextCleaner</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1636>[source]</a></span></div>Normalize the corpus.</p>
+<p><a id='atomclassifier-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1662>[source]</a></span></div>Normalize the corpus.</p>
 <p>Convert words to a more uniform standard. The transformations
 are applied on the column named <code>corpus</code>, in the same order the
 parameters are presented. If there is no column with that name,
@@ -4455,7 +4453,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textnormalizer/#textnormalizer">TextNormalizer</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1669>[source]</a></span></div>Tokenize the corpus.</p>
+<p><a id='atomclassifier-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1695>[source]</a></span></div>Tokenize the corpus.</p>
 <p>Convert documents into sequences of words. Additionally,
 create n-grams (represented by words united with underscores,
 e.g. "New_York") based on their frequency in the corpus. The
@@ -4463,7 +4461,7 @@ <h2 id="nlp">NLP</h2>
 there is no column with that name, an exception is raised.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/tokenizer/#tokenizer">Tokenizer</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1702>[source]</a></span></div>Vectorize the corpus.</p>
+<p><a id='atomclassifier-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1728>[source]</a></span></div>Vectorize the corpus.</p>
 <p>Transform the corpus into meaningful vectors of numbers. The
 transformation is applied on the column named <code>corpus</code>. If
 there is no column with that name, an exception is raised.</p>
@@ -4484,7 +4482,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-feature_extraction">feature_extraction</a></td><td>Extract features from datetime columns.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-feature_generation">feature_generation</a></td><td>Generate new features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-feature_grouping">feature_grouping</a></td><td>Extract statistics from similar features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-feature_selection">feature_selection</a></td><td>Reduce the number of features in the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomclassifier-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1735>[source]</a></span></div>Extract features from datetime columns.</p>
+<p><a id='atomclassifier-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1761>[source]</a></span></div>Extract features from datetime columns.</p>
 <p>Create new features extracting datetime elements (day, month,
 year, etc...) from the provided columns. Columns of dtype
 <code>datetime64</code> are used as is. Categorical columns that can be
@@ -4493,13 +4491,13 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featureextractor/#featureextractor">FeatureExtractor</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1768>[source]</a></span></div>Generate new features.</p>
+<p><a id='atomclassifier-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1794>[source]</a></span></div>Generate new features.</p>
 <p>Create new combinations of existing features to capture the
 non-linear relations between the original features.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregenerator/#featuregenerator">FeatureGenerator</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1801>[source]</a></span></div>Extract statistics from similar features.</p>
+<p><a id='atomclassifier-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1827>[source]</a></span></div>Extract statistics from similar features.</p>
 <p>Replace groups of features with related characteristics with new
 features that summarize statistical properties of te group. The
 statistical operators are calculated over every row of the group.
@@ -4508,7 +4506,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregrouper/#featuregrouper">FeatureGrouper</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1833>[source]</a></span></div>Reduce the number of features in the data.</p>
+<p><a id='atomclassifier-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1859>[source]</a></span></div>Reduce the number of features in the data.</p>
 <p>Apply feature selection or dimensionality reduction, either to
 improve the estimators' accuracy or to boost their performance
 on very high-dimensional datasets. Additionally, remove
@@ -4536,7 +4534,7 @@ <h2 id="training">Training</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-run">run</a></td><td>Train and evaluate the models in a direct fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-successive_halving">successive_halving</a></td><td>Fit the models in a successive halving fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomclassifier-train_sizing">train_sizing</a></td><td>Train and evaluate the models in a train sizing fashion.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1970>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='atomclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1997>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>Contrary to <a class="autorefs autorefs-internal" href="#atomclassifier-successive_halving">successive_halving</a> and
 <a class="autorefs autorefs-internal" href="#atomclassifier-train_sizing">train_sizing</a>, the direct approach only
 iterates once over the models, using the full dataset.</p>
@@ -4553,7 +4551,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/directclassifier/#directclassifier">DirectClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/directregressor/#directregressor">DirectRegressor</a> class for a
 description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2025>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
+<p><a id='atomclassifier-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2052>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
 <p>The successive halving technique is a bandit-based algorithm
 that fits N models to 1/N of the data. The best half are
 selected to go to the next iteration where the process is
@@ -4576,7 +4574,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/successivehalvingclassifier/#successivehalvingclassifier">SuccessiveHalvingClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/successivehalvingregressor/#successivehalvingregressor">SuccessiveHalvingRegressor</a>
 class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomclassifier-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2088>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
+<p><a id='atomclassifier-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2115>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
 <p>When training models, there is usually a trade-off between
 model performance and computation time, that is regulated by
 the number of samples in the training set. This method can be
diff --git a/docs/API/ATOM/atomforecaster/index.html b/docs/API/ATOM/atomforecaster/index.html
index 18f2db516..3d4089054 100644
--- a/docs/API/ATOM/atomforecaster/index.html
+++ b/docs/API/ATOM/atomforecaster/index.html
@@ -1288,7 +1288,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1330,7 +1330,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1372,7 +1372,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3392,7 +3392,7 @@
 
 <h1 id="atomforecaster">ATOMForecaster</h1>
 <hr />
-<p><a id='ATOMForecaster'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMForecaster</strong>(*arrays, y=-1, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L364>[source]</a></span></div>Main class for forecasting tasks.</p>
+<p><a id='ATOMForecaster'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMForecaster</strong>(*arrays, y=-1, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L369>[source]</a></span></div>Main class for forecasting tasks.</p>
 <p>Apply all data transformations and model management provided by
 the package on a given dataset. Note that, contrary to sklearn's
 API, the instance contains the dataset on which to perform the
@@ -3475,17 +3475,16 @@ <h1 id="atomforecaster">ATOMForecaster</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='atomforecaster-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='atomforecaster-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3493,7 +3492,7 @@ <h1 id="atomforecaster">ATOMForecaster</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3586,15 +3585,15 @@ <h3 id="data-attributes">Data attributes</h3>
 visualize the pipeline, use the <a class="autorefs autorefs-internal" href="../../plots/plot_pipeline/#plot_pipeline">plot_pipeline</a> method.</div><a id='atomforecaster-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='atomforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomforecaster-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomforecaster-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='atomforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomforecaster-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
+etc...).</div><a id='atomforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomforecaster-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomforecaster-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='atomforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomforecaster-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
 <p>A data set is considered scaled when it has mean=0 and std=1,
 or when there is a scaler in the pipeline. Binary columns (only
-0s and 1s) are excluded from the calculation.</div><a id='atomforecaster-duplicates'></a><strong>duplicates: series</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomforecaster-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
+0s and 1s) are excluded from the calculation.</div><a id='atomforecaster-duplicates'></a><strong>duplicates: int</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomforecaster-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
 <p>These values are used by the <a class="autorefs autorefs-internal" href="#atomforecaster-clean">clean</a> and
 <a class="autorefs autorefs-internal" href="#atomforecaster-impute">impute</a> methods. Default values are: None, NaN,
 NaT, +inf, -inf, "", "?", "None", "NA", "nan", "NaN", "NaT",
 "inf". Note that None, NaN, +inf and -inf are always considered
-missing since they are incompatible with sklearn estimators.</div><a id='atomforecaster-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomforecaster-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomforecaster-numerical'></a><strong>numerical: series</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomforecaster-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomforecaster-categorical'></a><strong>categorical: series</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomforecaster-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomforecaster-outliers'></a><strong>outliers: series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomforecaster-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div></td></tr></p>
+missing since they are incompatible with sklearn estimators.</div><a id='atomforecaster-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomforecaster-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomforecaster-numerical'></a><strong>numerical: index</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomforecaster-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomforecaster-categorical'></a><strong>categorical: index</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomforecaster-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomforecaster-outliers'></a><strong>outliers: pd.Series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomforecaster-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3644,7 +3643,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomforecaster-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomforecaster-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='atomforecaster-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='atomforecaster-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='atomforecaster-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='atomforecaster-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='atomforecaster-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3656,7 +3655,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-add">add</a></td><td>Add a transformer to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-apply">apply</a></td><td>Apply a function to the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-automl">automl</a></td><td>Search for an optimized pipeline in an automated fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-distribution">distribution</a></td><td>Get statistics on column distributions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-eda">eda</a></td><td>Create an Exploratory Data Analysis report.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-get_sample_weight">get_sample_weight</a></td><td>Return sample weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-load">load</a></td><td>Loads an atom instance from a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-reset">reset</a></td><td>Reset the instance to it's initial state.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-save_data">save_data</a></td><td>Save the data in the current branch to a <code>.csv</code> file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-shrink">shrink</a></td><td>Converts the columns to the smallest possible matching dtype.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-stats">stats</a></td><td>Display basic information about the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-status">status</a></td><td>Get an overview of the branches and models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomforecaster-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1134>[source]</a></span></div>Add a transformer to the pipeline.</p>
+<p><a id='atomforecaster-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1156>[source]</a></span></div>Add a transformer to the pipeline.</p>
 <p>If the transformer is not fitted, it is fitted on the complete
 training set. Afterwards, the data set is transformed and the
 estimator is added to atom's pipeline. If the estimator is
@@ -3719,10 +3718,9 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1217>[source]</a></span></div>Apply a function to the dataset.</p>
-<p>The function should have signature <code>func(dataset, **kw_args) -&gt;
-dataset</code>. This method is useful for stateless transformations
-such as taking the log, doing custom scaling, etc...</p>
+<p><a id='atomforecaster-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1239>[source]</a></span></div>Apply a function to the dataset.</p>
+<p>This method is useful for stateless transformations such as
+taking the log, doing custom scaling, etc...</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>This approach is preferred over changing the dataset directly
@@ -3735,7 +3733,8 @@ <h2 id="utility-methods">Utility methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='apply-func'></a><strong>func: callable</strong><br><div markdown class='param'>
-Function to apply.</p>
+Function to apply with signature <code>func(dataset, **kw_args) -&gt;
+dataset</code>.</p>
 <p></div><a id='apply-inverse_func'></a><strong>inverse_func: callable or None, default=None</strong><br><div markdown class='param'>
 Inverse function of <code>func</code>. If None, the inverse_transform
 method returns the input unchanged.</p>
@@ -3746,7 +3745,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L308>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
+<p><a id='atomforecaster-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L315>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
 <p>Automated machine learning (AutoML) automates the selection,
 composition and parameterization of machine learning pipelines.
 Automating the machine learning often provides faster, more
@@ -3768,7 +3767,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='atomforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3790,7 +3789,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='atomforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3835,7 +3834,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='atomforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#atomforecaster-save">saving</a> the instance. The affected
@@ -3850,7 +3849,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='atomforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='atomforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#atomforecaster-save">saving</a>. Deleted models are not removed from
@@ -3861,7 +3860,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L399>[source]</a></span></div>Get statistics on column distributions.</p>
+<p><a id='atomforecaster-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L406>[source]</a></span></div>Get statistics on column distributions.</p>
 <p>Compute the <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">Kolmogorov-Smirnov test</a> for various
 distributions against columns in the dataset. Only for numerical
 columns. Missing values are ignored.</p>
@@ -3891,7 +3890,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L480>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
+<p><a id='atomforecaster-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L487>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
 <p>ATOM uses the <a href="https://github.com/ydataai/ydata-profiling">ydata-profiling</a> package for the EDA.
 The report is rendered directly in the notebook. The created
 <a href="https://ydata-profiling.ydata.ai/docs/master/pages/reference/api/_autosummary/ydata_profiling.profile_report.ProfileReport.html">ProfileReport</a> instance can be accessed through the <code>report</code>
@@ -3915,7 +3914,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='atomforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3942,7 +3941,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='atomforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3981,7 +3980,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='atomforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3996,7 +3995,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L666>[source]</a></span></div>Return sample weights for a balanced data set.</p>
+<p><a id='atomforecaster-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L665>[source]</a></span></div>Return sample weights for a balanced data set.</p>
 <p>The returned weights are inversely proportional to the class
 frequencies in the selected data set. For <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multioutput-tasks">multioutput tasks</a>,
 the weights of each column of <code>y</code> will be multiplied.</p>
@@ -4009,7 +4008,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L533>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='atomforecaster-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L540>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -4039,7 +4038,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L592>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
+<p><a id='atomforecaster-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L599>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
 <p>If the instance was <a class="autorefs autorefs-internal" href="#atomforecaster-save">saved</a> using <code>save_data=False</code>,
 it's possible to load new data into it and apply all data
 transformations.</p>
@@ -4089,7 +4088,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='atomforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -4101,7 +4100,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='atomforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -4119,7 +4118,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='atomforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4128,7 +4127,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='atomforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4137,13 +4136,13 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L719>[source]</a></span></div>Reset the instance to it's initial state.</p>
+<p><a id='atomforecaster-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L726>[source]</a></span></div>Reset the instance to it's initial state.</p>
 <p>Deletes all branches and models. The dataset is also reset
 to its form after initialization.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='atomforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='atomforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4154,7 +4153,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L738>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
+<p><a id='atomforecaster-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L745>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_data-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4165,7 +4164,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L762>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
+<p><a id='atomforecaster-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L775>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='shrink-int2bool'></a><strong>int2bool: bool, default=False</strong><br><div markdown class='param'>
 Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the
@@ -4186,7 +4185,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='atomforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4206,18 +4205,18 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L894>[source]</a></span></div>Display basic information about the dataset.</p>
+<p><a id='atomforecaster-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L907>[source]</a></span></div>Display basic information about the dataset.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='stats-_vb'></a><strong>_vb: int, default=-2</strong><br><div markdown class='param'>
 Internal parameter to always print if called by user.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L958>[source]</a></span></div>Get an overview of the branches and models.</p>
+<p><a id='atomforecaster-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L971>[source]</a></span></div>Get an overview of the branches and models.</p>
 <p>This method prints the same information as the __repr__ and
 also saves it to the logger.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L968>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='atomforecaster-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L981>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4247,7 +4246,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='atomforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4280,7 +4279,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-clean">clean</a></td><td>Applies standard data cleaning steps on the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-discretize">discretize</a></td><td>Bin continuous data into intervals.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-encode">encode</a></td><td>Perform encoding of categorical features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-impute">impute</a></td><td>Handle missing values in the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-normalize">normalize</a></td><td>Transform the data to follow a Normal/Gaussian distribution.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-prune">prune</a></td><td>Prune outliers from the training set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-scale">scale</a></td><td>Scale the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomforecaster-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1312>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
+<p><a id='atomforecaster-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1334>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
 <p>Use the parameters to choose which transformations to perform.
 The available steps are:</p>
 <ul>
@@ -4294,7 +4293,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 </ul>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/cleaner/#cleaner">Cleaner</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1359>[source]</a></span></div>Bin continuous data into intervals.</p>
+<p><a id='atomforecaster-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1381>[source]</a></span></div>Bin continuous data into intervals.</p>
 <p>For each feature, the bin edges are computed during fit
 and, together with the number of bins, they will define the
 intervals. Ignores numerical columns.</p>
@@ -4305,7 +4304,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution and decide on the bins.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomforecaster-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1391>[source]</a></span></div>Perform encoding of categorical features.</p>
+<p><a id='atomforecaster-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1413>[source]</a></span></div>Perform encoding of categorical features.</p>
 <p>The encoding type depends on the number of classes in the
 column:</p>
 <ul>
@@ -4330,7 +4329,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 list of the categorical features in the dataset.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomforecaster-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1444>[source]</a></span></div>Handle missing values in the dataset.</p>
+<p><a id='atomforecaster-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1466>[source]</a></span></div>Handle missing values in the dataset.</p>
 <p>Impute or remove missing values according to the selected
 strategy. Also removes rows and columns with too many missing
 values. Use the <code>missing</code> attribute to customize what are
@@ -4342,7 +4341,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 missing values per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomforecaster-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1482>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
+<p><a id='atomforecaster-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1504>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
 <p>This transformation is useful for modeling issues related
 to heteroscedasticity (non-constant variance), or other
 situations where normality is desired. Missing values are
@@ -4355,7 +4354,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomforecaster-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1512>[source]</a></span></div>Prune outliers from the training set.</p>
+<p><a id='atomforecaster-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1538>[source]</a></span></div>Prune outliers from the training set.</p>
 <p>Replace or remove outliers. The definition of outlier depends
 on the selected strategy and can greatly differ from one
 another. Ignores categorical columns.</p>
@@ -4372,7 +4371,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 number of outliers per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomforecaster-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1556>[source]</a></span></div>Scale the data.</p>
+<p><a id='atomforecaster-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1582>[source]</a></span></div>Scale the data.</p>
 <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> class for a description of the parameters.</p>
 <div class="admonition tip">
@@ -4391,7 +4390,7 @@ <h2 id="nlp">NLP</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-textclean">textclean</a></td><td>Applies standard text cleaning to the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-textnormalize">textnormalize</a></td><td>Normalize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-tokenize">tokenize</a></td><td>Tokenize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-vectorize">vectorize</a></td><td>Vectorize the corpus.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomforecaster-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1583>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
+<p><a id='atomforecaster-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1609>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
 <p>Transformations include normalizing characters and dropping
 noise from the text (emails, HTML tags, URLs, etc...). The
 transformations are applied on the column named <code>corpus</code>, in
@@ -4400,7 +4399,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textcleaner/#textcleaner">TextCleaner</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1636>[source]</a></span></div>Normalize the corpus.</p>
+<p><a id='atomforecaster-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1662>[source]</a></span></div>Normalize the corpus.</p>
 <p>Convert words to a more uniform standard. The transformations
 are applied on the column named <code>corpus</code>, in the same order the
 parameters are presented. If there is no column with that name,
@@ -4409,7 +4408,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textnormalizer/#textnormalizer">TextNormalizer</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1669>[source]</a></span></div>Tokenize the corpus.</p>
+<p><a id='atomforecaster-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1695>[source]</a></span></div>Tokenize the corpus.</p>
 <p>Convert documents into sequences of words. Additionally,
 create n-grams (represented by words united with underscores,
 e.g. "New_York") based on their frequency in the corpus. The
@@ -4417,7 +4416,7 @@ <h2 id="nlp">NLP</h2>
 there is no column with that name, an exception is raised.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/tokenizer/#tokenizer">Tokenizer</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1702>[source]</a></span></div>Vectorize the corpus.</p>
+<p><a id='atomforecaster-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1728>[source]</a></span></div>Vectorize the corpus.</p>
 <p>Transform the corpus into meaningful vectors of numbers. The
 transformation is applied on the column named <code>corpus</code>. If
 there is no column with that name, an exception is raised.</p>
@@ -4438,7 +4437,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-feature_extraction">feature_extraction</a></td><td>Extract features from datetime columns.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-feature_generation">feature_generation</a></td><td>Generate new features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-feature_grouping">feature_grouping</a></td><td>Extract statistics from similar features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-feature_selection">feature_selection</a></td><td>Reduce the number of features in the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomforecaster-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1735>[source]</a></span></div>Extract features from datetime columns.</p>
+<p><a id='atomforecaster-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1761>[source]</a></span></div>Extract features from datetime columns.</p>
 <p>Create new features extracting datetime elements (day, month,
 year, etc...) from the provided columns. Columns of dtype
 <code>datetime64</code> are used as is. Categorical columns that can be
@@ -4447,13 +4446,13 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featureextractor/#featureextractor">FeatureExtractor</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1768>[source]</a></span></div>Generate new features.</p>
+<p><a id='atomforecaster-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1794>[source]</a></span></div>Generate new features.</p>
 <p>Create new combinations of existing features to capture the
 non-linear relations between the original features.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregenerator/#featuregenerator">FeatureGenerator</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1801>[source]</a></span></div>Extract statistics from similar features.</p>
+<p><a id='atomforecaster-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1827>[source]</a></span></div>Extract statistics from similar features.</p>
 <p>Replace groups of features with related characteristics with new
 features that summarize statistical properties of te group. The
 statistical operators are calculated over every row of the group.
@@ -4462,7 +4461,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregrouper/#featuregrouper">FeatureGrouper</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1833>[source]</a></span></div>Reduce the number of features in the data.</p>
+<p><a id='atomforecaster-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1859>[source]</a></span></div>Reduce the number of features in the data.</p>
 <p>Apply feature selection or dimensionality reduction, either to
 improve the estimators' accuracy or to boost their performance
 on very high-dimensional datasets. Additionally, remove
@@ -4490,7 +4489,7 @@ <h2 id="training">Training</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-run">run</a></td><td>Train and evaluate the models in a direct fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-successive_halving">successive_halving</a></td><td>Fit the models in a successive halving fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomforecaster-train_sizing">train_sizing</a></td><td>Train and evaluate the models in a train sizing fashion.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1970>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='atomforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1997>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>Contrary to <a class="autorefs autorefs-internal" href="#atomforecaster-successive_halving">successive_halving</a> and
 <a class="autorefs autorefs-internal" href="#atomforecaster-train_sizing">train_sizing</a>, the direct approach only
 iterates once over the models, using the full dataset.</p>
@@ -4507,7 +4506,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/directclassifier/#directclassifier">DirectClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/directregressor/#directregressor">DirectRegressor</a> class for a
 description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2025>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
+<p><a id='atomforecaster-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2052>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
 <p>The successive halving technique is a bandit-based algorithm
 that fits N models to 1/N of the data. The best half are
 selected to go to the next iteration where the process is
@@ -4530,7 +4529,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/successivehalvingclassifier/#successivehalvingclassifier">SuccessiveHalvingClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/successivehalvingregressor/#successivehalvingregressor">SuccessiveHalvingRegressor</a>
 class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomforecaster-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2088>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
+<p><a id='atomforecaster-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2115>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
 <p>When training models, there is usually a trade-off between
 model performance and computation time, that is regulated by
 the number of samples in the training set. This method can be
diff --git a/docs/API/ATOM/atommodel/index.html b/docs/API/ATOM/atommodel/index.html
index ddd7a11d9..5e9cb891d 100644
--- a/docs/API/ATOM/atommodel/index.html
+++ b/docs/API/ATOM/atommodel/index.html
@@ -1205,7 +1205,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1247,7 +1247,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1289,7 +1289,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="atommodel">ATOMModel</h1>
 <hr />
-<p><a id='ATOMModel'></a><div class='sign'><em>function</em> atom.api.<strong style='color:#008AB8'>ATOMModel</strong>(estimator, name=None, acronym=None, needs_scaling=False, native_multilabel=False, native_multioutput=False, has_validation=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L21>[source]</a></span></div>Convert an estimator to a model that can be ingested by atom.</p>
+<p><a id='ATOMModel'></a><div class='sign'><em>function</em> atom.api.<strong style='color:#008AB8'>ATOMModel</strong>(estimator, name=None, acronym=None, needs_scaling=False, native_multilabel=False, native_multioutput=False, has_validation=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L27>[source]</a></span></div>Convert an estimator to a model that can be ingested by atom.</p>
 <p>This function adds the relevant attributes to the estimator so
 that they can be used by atom. Note that only estimators that follow
 <a href="https://scikit-learn.org/stable/developers/develop.html">sklearn's API</a> are compatible.</p>
diff --git a/docs/API/ATOM/atomregressor/index.html b/docs/API/ATOM/atomregressor/index.html
index f6014ecf6..2abe31e14 100644
--- a/docs/API/ATOM/atomregressor/index.html
+++ b/docs/API/ATOM/atomregressor/index.html
@@ -1288,7 +1288,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1330,7 +1330,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1372,7 +1372,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3392,7 +3392,7 @@
 
 <h1 id="atomregressor">ATOMRegressor</h1>
 <hr />
-<p><a id='ATOMRegressor'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMRegressor</strong>(*arrays, y=-1, index=False, shuffle=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L583>[source]</a></span></div>Main class for regression tasks.</p>
+<p><a id='ATOMRegressor'></a><div class='sign'><em>class</em> atom.api.<strong style='color:#008AB8'>ATOMRegressor</strong>(*arrays, y=-1, index=False, shuffle=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/api.py#L588>[source]</a></span></div>Main class for regression tasks.</p>
 <p>Apply all data transformations and model management provided by
 the package on a given dataset. Note that, contrary to sklearn's
 API, the instance contains the dataset on which to perform the
@@ -3484,17 +3484,16 @@ <h1 id="atomregressor">ATOMRegressor</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='atomregressor-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='atomregressor-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3502,7 +3501,7 @@ <h1 id="atomregressor">ATOMRegressor</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3599,15 +3598,15 @@ <h3 id="data-attributes">Data attributes</h3>
 visualize the pipeline, use the <a class="autorefs autorefs-internal" href="../../plots/plot_pipeline/#plot_pipeline">plot_pipeline</a> method.</div><a id='atomregressor-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='atomregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomregressor-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomregressor-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='atomregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomregressor-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
+etc...).</div><a id='atomregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='atomregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='atomregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='atomregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='atomregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='atomregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='atomregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='atomregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='atomregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='atomregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='atomregressor-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='atomregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='atomregressor-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='atomregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='atomregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div><a id='atomregressor-scaled'></a><strong>scaled: bool</strong><br><div markdown class='param'>Whether the feature set is scaled.</p>
 <p>A data set is considered scaled when it has mean=0 and std=1,
 or when there is a scaler in the pipeline. Binary columns (only
-0s and 1s) are excluded from the calculation.</div><a id='atomregressor-duplicates'></a><strong>duplicates: series</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomregressor-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
+0s and 1s) are excluded from the calculation.</div><a id='atomregressor-duplicates'></a><strong>duplicates: int</strong><br><div markdown class='param'>Number of duplicate rows in the dataset.</div><a id='atomregressor-missing'></a><strong>missing: list</strong><br><div markdown class='param'>Values that are considered "missing".</p>
 <p>These values are used by the <a class="autorefs autorefs-internal" href="#atomregressor-clean">clean</a> and
 <a class="autorefs autorefs-internal" href="#atomregressor-impute">impute</a> methods. Default values are: None, NaN,
 NaT, +inf, -inf, "", "?", "None", "NA", "nan", "NaN", "NaT",
 "inf". Note that None, NaN, +inf and -inf are always considered
-missing since they are incompatible with sklearn estimators.</div><a id='atomregressor-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomregressor-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomregressor-numerical'></a><strong>numerical: series</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomregressor-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomregressor-categorical'></a><strong>categorical: series</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomregressor-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomregressor-outliers'></a><strong>outliers: series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomregressor-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div></td></tr></p>
+missing since they are incompatible with sklearn estimators.</div><a id='atomregressor-nans'></a><strong>nans: series | None</strong><br><div markdown class='param'>Columns with the number of missing values in them.</div><a id='atomregressor-n_nans'></a><strong>n_nans: int | None</strong><br><div markdown class='param'>Number of samples containing missing values.</div><a id='atomregressor-numerical'></a><strong>numerical: index</strong><br><div markdown class='param'>Names of the numerical features in the dataset.</div><a id='atomregressor-n_numerical'></a><strong>n_numerical: int</strong><br><div markdown class='param'>Number of numerical features in the dataset.</div><a id='atomregressor-categorical'></a><strong>categorical: index</strong><br><div markdown class='param'>Names of the categorical features in the dataset.</div><a id='atomregressor-n_categorical'></a><strong>n_categorical: int</strong><br><div markdown class='param'>Number of categorical features in the dataset.</div><a id='atomregressor-outliers'></a><strong>outliers: pd.Series | None</strong><br><div markdown class='param'>Columns in training set with amount of outlier values.</div><a id='atomregressor-n_outliers'></a><strong>n_outliers: int | None</strong><br><div markdown class='param'>Number of samples in the training set containing outliers.</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3657,7 +3656,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomregressor-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='atomregressor-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='atomregressor-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='atomregressor-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='atomregressor-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='atomregressor-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='atomregressor-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3669,7 +3668,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-add">add</a></td><td>Add a transformer to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-apply">apply</a></td><td>Apply a function to the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-automl">automl</a></td><td>Search for an optimized pipeline in an automated fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-distribution">distribution</a></td><td>Get statistics on column distributions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-eda">eda</a></td><td>Create an Exploratory Data Analysis report.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-get_sample_weight">get_sample_weight</a></td><td>Return sample weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-load">load</a></td><td>Loads an atom instance from a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-reset">reset</a></td><td>Reset the instance to it's initial state.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-save_data">save_data</a></td><td>Save the data in the current branch to a <code>.csv</code> file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-shrink">shrink</a></td><td>Converts the columns to the smallest possible matching dtype.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-stats">stats</a></td><td>Display basic information about the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-status">status</a></td><td>Get an overview of the branches and models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomregressor-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1134>[source]</a></span></div>Add a transformer to the pipeline.</p>
+<p><a id='atomregressor-add'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>add</strong>(transformer, columns=None, train_only=False, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1156>[source]</a></span></div>Add a transformer to the pipeline.</p>
 <p>If the transformer is not fitted, it is fitted on the complete
 training set. Afterwards, the data set is transformed and the
 estimator is added to atom's pipeline. If the estimator is
@@ -3732,10 +3731,9 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1217>[source]</a></span></div>Apply a function to the dataset.</p>
-<p>The function should have signature <code>func(dataset, **kw_args) -&gt;
-dataset</code>. This method is useful for stateless transformations
-such as taking the log, doing custom scaling, etc...</p>
+<p><a id='atomregressor-apply'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>apply</strong>(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1239>[source]</a></span></div>Apply a function to the dataset.</p>
+<p>This method is useful for stateless transformations such as
+taking the log, doing custom scaling, etc...</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>This approach is preferred over changing the dataset directly
@@ -3748,7 +3746,8 @@ <h2 id="utility-methods">Utility methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='apply-func'></a><strong>func: callable</strong><br><div markdown class='param'>
-Function to apply.</p>
+Function to apply with signature <code>func(dataset, **kw_args) -&gt;
+dataset</code>.</p>
 <p></div><a id='apply-inverse_func'></a><strong>inverse_func: callable or None, default=None</strong><br><div markdown class='param'>
 Inverse function of <code>func</code>. If None, the inverse_transform
 method returns the input unchanged.</p>
@@ -3759,7 +3758,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L308>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
+<p><a id='atomregressor-automl'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>automl</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L315>[source]</a></span></div>Search for an optimized pipeline in an automated fashion.</p>
 <p>Automated machine learning (AutoML) automates the selection,
 composition and parameterization of machine learning pipelines.
 Automating the machine learning often provides faster, more
@@ -3781,7 +3780,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='atomregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3803,7 +3802,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='atomregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3848,7 +3847,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='atomregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#atomregressor-save">saving</a> the instance. The affected
@@ -3863,7 +3862,7 @@ <h2 id="utility-methods">Utility methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='atomregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='atomregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#atomregressor-save">saving</a>. Deleted models are not removed from
@@ -3874,7 +3873,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L399>[source]</a></span></div>Get statistics on column distributions.</p>
+<p><a id='atomregressor-distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>distribution</strong>(distributions=None, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L406>[source]</a></span></div>Get statistics on column distributions.</p>
 <p>Compute the <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">Kolmogorov-Smirnov test</a> for various
 distributions against columns in the dataset. Only for numerical
 columns. Missing values are ignored.</p>
@@ -3904,7 +3903,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L480>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
+<p><a id='atomregressor-eda'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>eda</strong>(dataset="dataset", n_rows=None, filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L487>[source]</a></span></div>Create an Exploratory Data Analysis report.</p>
 <p>ATOM uses the <a href="https://github.com/ydataai/ydata-profiling">ydata-profiling</a> package for the EDA.
 The report is rendered directly in the notebook. The created
 <a href="https://ydata-profiling.ydata.ai/docs/master/pages/reference/api/_autosummary/ydata_profiling.profile_report.ProfileReport.html">ProfileReport</a> instance can be accessed through the <code>report</code>
@@ -3928,7 +3927,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='atomregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3955,7 +3954,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='atomregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3994,7 +3993,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='atomregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -4009,7 +4008,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L666>[source]</a></span></div>Return sample weights for a balanced data set.</p>
+<p><a id='atomregressor-get_sample_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_sample_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L665>[source]</a></span></div>Return sample weights for a balanced data set.</p>
 <p>The returned weights are inversely proportional to the class
 frequencies in the selected data set. For <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multioutput-tasks">multioutput tasks</a>,
 the weights of each column of <code>y</code> will be multiplied.</p>
@@ -4022,7 +4021,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L533>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='atomregressor-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L540>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -4052,7 +4051,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L592>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
+<p><a id='atomregressor-load'></a><div class='sign'><em>function</em> atom.atom.<strong style='color:#008AB8'>load</strong>(filename, data=None, transform_data=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L599>[source]</a></span></div>Loads an atom instance from a pickle file.</p>
 <p>If the instance was <a class="autorefs autorefs-internal" href="#atomregressor-save">saved</a> using <code>save_data=False</code>,
 it's possible to load new data into it and apply all data
 transformations.</p>
@@ -4102,7 +4101,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='atomregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -4114,7 +4113,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='atomregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -4132,7 +4131,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='atomregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4141,7 +4140,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='atomregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4150,13 +4149,13 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L719>[source]</a></span></div>Reset the instance to it's initial state.</p>
+<p><a id='atomregressor-reset'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L726>[source]</a></span></div>Reset the instance to it's initial state.</p>
 <p>Deletes all branches and models. The dataset is also reset
 to its form after initialization.</p>
 <p><br><br></p>
-<p><a id='atomregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='atomregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='atomregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='atomregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4167,7 +4166,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L738>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
+<p><a id='atomregressor-save_data'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_data</strong>(filename="auto", dataset="dataset", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L745>[source]</a></span></div>Save the data in the current branch to a <code>.csv</code> file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_data-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4178,7 +4177,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L762>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
+<p><a id='atomregressor-shrink'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>shrink</strong>(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L775>[source]</a></span></div>Converts the columns to the smallest possible matching dtype.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='shrink-int2bool'></a><strong>int2bool: bool, default=False</strong><br><div markdown class='param'>
 Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the
@@ -4199,7 +4198,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='atomregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4219,18 +4218,18 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L894>[source]</a></span></div>Display basic information about the dataset.</p>
+<p><a id='atomregressor-stats'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stats</strong>(_vb=-2)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L907>[source]</a></span></div>Display basic information about the dataset.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='stats-_vb'></a><strong>_vb: int, default=-2</strong><br><div markdown class='param'>
 Internal parameter to always print if called by user.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L958>[source]</a></span></div>Get an overview of the branches and models.</p>
+<p><a id='atomregressor-status'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>status</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L971>[source]</a></span></div>Get an overview of the branches and models.</p>
 <p>This method prints the same information as the __repr__ and
 also saves it to the logger.</p>
 <p><br><br></p>
-<p><a id='atomregressor-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L968>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='atomregressor-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L981>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4260,7 +4259,7 @@ <h2 id="utility-methods">Utility methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='atomregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='atomregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4293,7 +4292,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-clean">clean</a></td><td>Applies standard data cleaning steps on the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-discretize">discretize</a></td><td>Bin continuous data into intervals.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-encode">encode</a></td><td>Perform encoding of categorical features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-impute">impute</a></td><td>Handle missing values in the dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-normalize">normalize</a></td><td>Transform the data to follow a Normal/Gaussian distribution.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-prune">prune</a></td><td>Prune outliers from the training set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-scale">scale</a></td><td>Scale the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomregressor-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1312>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
+<p><a id='atomregressor-clean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clean</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1334>[source]</a></span></div>Applies standard data cleaning steps on the dataset.</p>
 <p>Use the parameters to choose which transformations to perform.
 The available steps are:</p>
 <ul>
@@ -4307,7 +4306,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 </ul>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/cleaner/#cleaner">Cleaner</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1359>[source]</a></span></div>Bin continuous data into intervals.</p>
+<p><a id='atomregressor-discretize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>discretize</strong>(strategy="quantile", bins=5, labels=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1381>[source]</a></span></div>Bin continuous data into intervals.</p>
 <p>For each feature, the bin edges are computed during fit
 and, together with the number of bins, they will define the
 intervals. Ignores numerical columns.</p>
@@ -4318,7 +4317,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution and decide on the bins.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomregressor-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1391>[source]</a></span></div>Perform encoding of categorical features.</p>
+<p><a id='atomregressor-encode'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>encode</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="rare", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1413>[source]</a></span></div>Perform encoding of categorical features.</p>
 <p>The encoding type depends on the number of classes in the
 column:</p>
 <ul>
@@ -4343,7 +4342,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 list of the categorical features in the dataset.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomregressor-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1444>[source]</a></span></div>Handle missing values in the dataset.</p>
+<p><a id='atomregressor-impute'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>impute</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1466>[source]</a></span></div>Handle missing values in the dataset.</p>
 <p>Impute or remove missing values according to the selected
 strategy. Also removes rows and columns with too many missing
 values. Use the <code>missing</code> attribute to customize what are
@@ -4355,7 +4354,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 missing values per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomregressor-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1482>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
+<p><a id='atomregressor-normalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>normalize</strong>(strategy="yeojohnson", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1504>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
 <p>This transformation is useful for modeling issues related
 to heteroscedasticity (non-constant variance), or other
 situations where normality is desired. Missing values are
@@ -4368,7 +4367,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 distribution.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomregressor-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1512>[source]</a></span></div>Prune outliers from the training set.</p>
+<p><a id='atomregressor-prune'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>prune</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1538>[source]</a></span></div>Prune outliers from the training set.</p>
 <p>Replace or remove outliers. The definition of outlier depends
 on the selected strategy and can greatly differ from one
 another. Ignores categorical columns.</p>
@@ -4385,7 +4384,7 @@ <h2 id="data-cleaning">Data cleaning</h2>
 number of outliers per column.</p>
 </div>
 <p><br><br></p>
-<p><a id='atomregressor-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1556>[source]</a></span></div>Scale the data.</p>
+<p><a id='atomregressor-scale'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>scale</strong>(strategy="standard", include_binary=False, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1582>[source]</a></span></div>Scale the data.</p>
 <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> class for a description of the parameters.</p>
 <div class="admonition tip">
@@ -4404,7 +4403,7 @@ <h2 id="nlp">NLP</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-textclean">textclean</a></td><td>Applies standard text cleaning to the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-textnormalize">textnormalize</a></td><td>Normalize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-tokenize">tokenize</a></td><td>Tokenize the corpus.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-vectorize">vectorize</a></td><td>Vectorize the corpus.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomregressor-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1583>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
+<p><a id='atomregressor-textclean'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textclean</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1609>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
 <p>Transformations include normalizing characters and dropping
 noise from the text (emails, HTML tags, URLs, etc...). The
 transformations are applied on the column named <code>corpus</code>, in
@@ -4413,7 +4412,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textcleaner/#textcleaner">TextCleaner</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1636>[source]</a></span></div>Normalize the corpus.</p>
+<p><a id='atomregressor-textnormalize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>textnormalize</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1662>[source]</a></span></div>Normalize the corpus.</p>
 <p>Convert words to a more uniform standard. The transformations
 are applied on the column named <code>corpus</code>, in the same order the
 parameters are presented. If there is no column with that name,
@@ -4422,7 +4421,7 @@ <h2 id="nlp">NLP</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/textnormalizer/#textnormalizer">TextNormalizer</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1669>[source]</a></span></div>Tokenize the corpus.</p>
+<p><a id='atomregressor-tokenize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>tokenize</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1695>[source]</a></span></div>Tokenize the corpus.</p>
 <p>Convert documents into sequences of words. Additionally,
 create n-grams (represented by words united with underscores,
 e.g. "New_York") based on their frequency in the corpus. The
@@ -4430,7 +4429,7 @@ <h2 id="nlp">NLP</h2>
 there is no column with that name, an exception is raised.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../nlp/tokenizer/#tokenizer">Tokenizer</a> class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1702>[source]</a></span></div>Vectorize the corpus.</p>
+<p><a id='atomregressor-vectorize'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>vectorize</strong>(strategy="bow", return_sparse=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1728>[source]</a></span></div>Vectorize the corpus.</p>
 <p>Transform the corpus into meaningful vectors of numbers. The
 transformation is applied on the column named <code>corpus</code>. If
 there is no column with that name, an exception is raised.</p>
@@ -4451,7 +4450,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-feature_extraction">feature_extraction</a></td><td>Extract features from datetime columns.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-feature_generation">feature_generation</a></td><td>Generate new features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-feature_grouping">feature_grouping</a></td><td>Extract statistics from similar features.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-feature_selection">feature_selection</a></td><td>Reduce the number of features in the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomregressor-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1735>[source]</a></span></div>Extract features from datetime columns.</p>
+<p><a id='atomregressor-feature_extraction'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_extraction</strong>(features=['day', 'month', 'year'], fmt=None, encoding_type="ordinal", drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1761>[source]</a></span></div>Extract features from datetime columns.</p>
 <p>Create new features extracting datetime elements (day, month,
 year, etc...) from the provided columns. Columns of dtype
 <code>datetime64</code> are used as is. Categorical columns that can be
@@ -4460,13 +4459,13 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featureextractor/#featureextractor">FeatureExtractor</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1768>[source]</a></span></div>Generate new features.</p>
+<p><a id='atomregressor-feature_generation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_generation</strong>(strategy="dfs", n_features=None, operators=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1794>[source]</a></span></div>Generate new features.</p>
 <p>Create new combinations of existing features to capture the
 non-linear relations between the original features.</p>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregenerator/#featuregenerator">FeatureGenerator</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1801>[source]</a></span></div>Extract statistics from similar features.</p>
+<p><a id='atomregressor-feature_grouping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_grouping</strong>(group, operators=None, drop_columns=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1827>[source]</a></span></div>Extract statistics from similar features.</p>
 <p>Replace groups of features with related characteristics with new
 features that summarize statistical properties of te group. The
 statistical operators are calculated over every row of the group.
@@ -4475,7 +4474,7 @@ <h2 id="feature-engineering">Feature engineering</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../feature_engineering/featuregrouper/#featuregrouper">FeatureGrouper</a> class for a description of the
 parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1833>[source]</a></span></div>Reduce the number of features in the data.</p>
+<p><a id='atomregressor-feature_selection'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>feature_selection</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1859>[source]</a></span></div>Reduce the number of features in the data.</p>
 <p>Apply feature selection or dimensionality reduction, either to
 improve the estimators' accuracy or to boost their performance
 on very high-dimensional datasets. Additionally, remove
@@ -4503,7 +4502,7 @@ <h2 id="training">Training</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-run">run</a></td><td>Train and evaluate the models in a direct fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-successive_halving">successive_halving</a></td><td>Fit the models in a successive halving fashion.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#atomregressor-train_sizing">train_sizing</a></td><td>Train and evaluate the models in a train sizing fashion.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='atomregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1970>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='atomregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L1997>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>Contrary to <a class="autorefs autorefs-internal" href="#atomregressor-successive_halving">successive_halving</a> and
 <a class="autorefs autorefs-internal" href="#atomregressor-train_sizing">train_sizing</a>, the direct approach only
 iterates once over the models, using the full dataset.</p>
@@ -4520,7 +4519,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/directclassifier/#directclassifier">DirectClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/directregressor/#directregressor">DirectRegressor</a> class for a
 description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2025>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
+<p><a id='atomregressor-successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>successive_halving</strong>(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2052>[source]</a></span></div>Fit the models in a successive halving fashion.</p>
 <p>The successive halving technique is a bandit-based algorithm
 that fits N models to 1/N of the data. The best half are
 selected to go to the next iteration where the process is
@@ -4543,7 +4542,7 @@ <h2 id="training">Training</h2>
 <p>See the <a class="autorefs autorefs-internal" href="../../training/successivehalvingclassifier/#successivehalvingclassifier">SuccessiveHalvingClassifier</a> or <a class="autorefs autorefs-internal" href="../../training/successivehalvingregressor/#successivehalvingregressor">SuccessiveHalvingRegressor</a>
 class for a description of the parameters.</p>
 <p><br><br></p>
-<p><a id='atomregressor-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2088>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
+<p><a id='atomregressor-train_sizing'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>train_sizing</strong>(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/atom.py#L2115>[source]</a></span></div>Train and evaluate the models in a train sizing fashion.</p>
 <p>When training models, there is usually a trade-off between
 model performance and computation time, that is regulated by
 the number of samples in the training set. This method can be
diff --git a/docs/API/data_cleaning/balancer/index.html b/docs/API/data_cleaning/balancer/index.html
index 88faab6eb..aee8132eb 100644
--- a/docs/API/data_cleaning/balancer/index.html
+++ b/docs/API/data_cleaning/balancer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="balancer">Balancer</h1>
 <hr />
-<p><a id='Balancer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Balancer</strong>(strategy="ADASYN", n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L189>[source]</a></span></div>Balance the number of samples per class in the target column.</p>
+<p><a id='Balancer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Balancer</strong>(strategy="ADASYN", n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L196>[source]</a></span></div>Balance the number of samples per class in the target column.</p>
 <p>When oversampling, the newly created samples have an increasing
 integer index for numerical indices, and an index of the form
 [estimator]_N for non-numerical indices, where N stands for the
@@ -3339,7 +3339,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#balancer-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#balancer-transform">transform</a></td><td>Balance the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='balancer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='balancer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3364,7 +3364,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='balancer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='balancer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3408,7 +3408,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='balancer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='balancer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3432,7 +3432,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='balancer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='balancer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3444,7 +3444,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='balancer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='balancer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3464,7 +3464,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='balancer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=-1)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L314>[source]</a></span></div>Balance the data.</p>
+<p><a id='balancer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=-1)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L322>[source]</a></span></div>Balance the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/data_cleaning/cleaner/index.html b/docs/API/data_cleaning/cleaner/index.html
index aa370330c..b3bcbcb13 100644
--- a/docs/API/data_cleaning/cleaner/index.html
+++ b/docs/API/data_cleaning/cleaner/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="cleaner">Cleaner</h1>
 <hr />
-<p><a id='Cleaner'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Cleaner</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, device="cpu", engine=None, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L493>[source]</a></span></div>Applies standard data cleaning steps on a dataset.</p>
+<p><a id='Cleaner'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Cleaner</strong>(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L501>[source]</a></span></div>Applies standard data cleaning steps on a dataset.</p>
 <p>Use the parameters to choose which transformations to perform.
 The available steps are:</p>
 <ul>
@@ -3280,17 +3280,16 @@ <h1 id="cleaner">Cleaner</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='cleaner-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='cleaner-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3298,7 +3297,7 @@ <h1 id="cleaner">Cleaner</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3378,7 +3377,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#cleaner-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-inverse_transform">inverse_transform</a></td><td>Inversely transform the label encoding.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#cleaner-transform">transform</a></td><td>Apply the data cleaning steps to the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='cleaner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L669>[source]</a></span></div>Fit to data.</p>
+<p><a id='cleaner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L677>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3399,7 +3398,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='cleaner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='cleaner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3443,7 +3442,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='cleaner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L856>[source]</a></span></div>Inversely transform the label encoding.</p>
+<p><a id='cleaner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L864>[source]</a></span></div>Inversely transform the label encoding.</p>
 <p>This method only inversely transforms the target encoding.
 The rest of the transformations can't be inverted. If
 <code>encode_target=False</code>, the data is returned as is.</p>
@@ -3468,7 +3467,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='cleaner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='cleaner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3480,7 +3479,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='cleaner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='cleaner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3500,7 +3499,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='cleaner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L733>[source]</a></span></div>Apply the data cleaning steps to the data.</p>
+<p><a id='cleaner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L741>[source]</a></span></div>Apply the data cleaning steps to the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
diff --git a/docs/API/data_cleaning/discretizer/index.html b/docs/API/data_cleaning/discretizer/index.html
index 2e86a1f49..ab1d8616c 100644
--- a/docs/API/data_cleaning/discretizer/index.html
+++ b/docs/API/data_cleaning/discretizer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="discretizer">Discretizer</h1>
 <hr />
-<p><a id='Discretizer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Discretizer</strong>(strategy="quantile", bins=5, labels=None, device="cpu", engine=None, verbose=0, logger=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L925>[source]</a></span></div>Bin continuous data into intervals.</p>
+<p><a id='Discretizer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Discretizer</strong>(strategy="quantile", bins=5, labels=None, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L933>[source]</a></span></div>Bin continuous data into intervals.</p>
 <p>For each feature, the bin edges are computed during fit and,
 together with the number of bins, they define the intervals.
 Ignores categorical columns.</p>
@@ -3290,17 +3290,16 @@ <h1 id="discretizer">Discretizer</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='discretizer-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='discretizer-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3308,7 +3307,7 @@ <h1 id="discretizer">Discretizer</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3393,7 +3392,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#discretizer-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#discretizer-transform">transform</a></td><td>Bin the data into intervals.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='discretizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1105>[source]</a></span></div>Fit to data.</p>
+<p><a id='discretizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1113>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3404,7 +3403,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='discretizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='discretizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3448,7 +3447,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='discretizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='discretizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3472,7 +3471,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='discretizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='discretizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3484,7 +3483,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='discretizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='discretizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3504,7 +3503,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='discretizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1220>[source]</a></span></div>Bin the data into intervals.</p>
+<p><a id='discretizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1223>[source]</a></span></div>Bin the data into intervals.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/data_cleaning/encoder/index.html b/docs/API/data_cleaning/encoder/index.html
index 362ddcf70..4de839743 100644
--- a/docs/API/data_cleaning/encoder/index.html
+++ b/docs/API/data_cleaning/encoder/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="encoder">Encoder</h1>
 <hr />
-<p><a id='Encoder'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Encoder</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="infrequent", verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1257>[source]</a></span></div>Perform encoding of categorical features.</p>
+<p><a id='Encoder'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Encoder</strong>(strategy="Target", max_onehot=10, ordinal=None, infrequent_to_value=None, value="infrequent", verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1260>[source]</a></span></div>Perform encoding of categorical features.</p>
 <p>The encoding type depends on the number of classes in the column:</p>
 <ul>
 <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li>
@@ -3367,7 +3367,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#encoder-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#encoder-transform">transform</a></td><td>Encode the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='encoder-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1421>[source]</a></span></div>Fit to data.</p>
+<p><a id='encoder-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1425>[source]</a></span></div>Fit to data.</p>
 <p>Note that leaving y=None can lead to errors if the <code>strategy</code>
 encoder requires target values. For multioutput tasks, only
 the first target column is used to fit the encoder.</p>
@@ -3390,7 +3390,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='encoder-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='encoder-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3434,7 +3434,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='encoder-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='encoder-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3458,7 +3458,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='encoder-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='encoder-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3470,7 +3470,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='encoder-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='encoder-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3490,7 +3490,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='encoder-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1592>[source]</a></span></div>Encode the data.</p>
+<p><a id='encoder-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1596>[source]</a></span></div>Encode the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/data_cleaning/imputer/index.html b/docs/API/data_cleaning/imputer/index.html
index 11e747b75..bf6ebe64b 100644
--- a/docs/API/data_cleaning/imputer/index.html
+++ b/docs/API/data_cleaning/imputer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="imputer">Imputer</h1>
 <hr />
-<p><a id='Imputer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Imputer</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, device="cpu", engine=None, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1657>[source]</a></span></div>Handle missing values in the data.</p>
+<p><a id='Imputer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Imputer</strong>(strat_num="drop", strat_cat="drop", max_nan_rows=None, max_nan_cols=None, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1661>[source]</a></span></div>Handle missing values in the data.</p>
 <p>Impute or remove missing values according to the selected strategy.
 Also removes rows and columns with too many missing values. Use
 the <code>missing</code> attribute to customize what are considered "missing
@@ -3275,17 +3275,16 @@ <h1 id="imputer">Imputer</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='imputer-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='imputer-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3293,7 +3292,7 @@ <h1 id="imputer">Imputer</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3379,7 +3378,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#imputer-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#imputer-transform">transform</a></td><td>Impute the missing values.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='imputer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1824>[source]</a></span></div>Fit to data.</p>
+<p><a id='imputer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1828>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3390,7 +3389,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='imputer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='imputer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3434,7 +3433,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='imputer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='imputer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3458,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='imputer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='imputer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3470,7 +3469,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='imputer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='imputer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3490,7 +3489,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='imputer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1935>[source]</a></span></div>Impute the missing values.</p>
+<p><a id='imputer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L1931>[source]</a></span></div>Impute the missing values.</p>
 <p>Note that leaving y=None can lead to inconsistencies in
 data length between X and y if rows are dropped during
 the transformation.</p>
diff --git a/docs/API/data_cleaning/normalizer/index.html b/docs/API/data_cleaning/normalizer/index.html
index ba0a4313d..880c285ea 100644
--- a/docs/API/data_cleaning/normalizer/index.html
+++ b/docs/API/data_cleaning/normalizer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="normalizer">Normalizer</h1>
 <hr />
-<p><a id='Normalizer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Normalizer</strong>(strategy="yeojohnson", device="cpu", engine=None, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2066>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
+<p><a id='Normalizer'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Normalizer</strong>(strategy="yeojohnson", device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2062>[source]</a></span></div>Transform the data to follow a Normal/Gaussian distribution.</p>
 <p>This transformation is useful for modeling issues related to
 heteroscedasticity (non-constant variance), or other situations
 where normality is desired. Missing values are disregarded in
@@ -3271,17 +3271,16 @@ <h1 id="normalizer">Normalizer</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='normalizer-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='normalizer-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3289,7 +3288,7 @@ <h1 id="normalizer">Normalizer</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3368,7 +3367,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#normalizer-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-inverse_transform">inverse_transform</a></td><td>Apply the inverse transformation to the data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#normalizer-transform">transform</a></td><td>Apply the transformations to the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='normalizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2220>[source]</a></span></div>Fit to data.</p>
+<p><a id='normalizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2216>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3379,7 +3378,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='normalizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='normalizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3423,7 +3422,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='normalizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2312>[source]</a></span></div>Apply the inverse transformation to the data.</p>
+<p><a id='normalizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2308>[source]</a></span></div>Apply the inverse transformation to the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3434,7 +3433,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='normalizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='normalizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3446,7 +3445,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='normalizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='normalizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3466,7 +3465,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='normalizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2278>[source]</a></span></div>Apply the transformations to the data.</p>
+<p><a id='normalizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2274>[source]</a></span></div>Apply the transformations to the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/data_cleaning/pruner/index.html b/docs/API/data_cleaning/pruner/index.html
index 3f2b974f5..60dd3337c 100644
--- a/docs/API/data_cleaning/pruner/index.html
+++ b/docs/API/data_cleaning/pruner/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="pruner">Pruner</h1>
 <hr />
-<p><a id='Pruner'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Pruner</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, device="cpu", engine=None, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2347>[source]</a></span></div>Prune outliers from the data.</p>
+<p><a id='Pruner'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Pruner</strong>(strategy="zscore", method="drop", max_sigma=3, include_target=False, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2343>[source]</a></span></div>Prune outliers from the data.</p>
 <p>Replace or remove outliers. The definition of outlier depends
 on the selected strategy and can greatly differ from one another.
 Ignores categorical columns.</p>
@@ -3286,17 +3286,16 @@ <h1 id="pruner">Pruner</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='pruner-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='pruner-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3304,7 +3303,7 @@ <h1 id="pruner">Pruner</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3381,7 +3380,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#pruner-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#pruner-transform">transform</a></td><td>Apply the outlier strategy on the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='pruner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='pruner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3406,7 +3405,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='pruner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='pruner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3450,7 +3449,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='pruner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='pruner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3474,7 +3473,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='pruner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='pruner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3486,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='pruner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='pruner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3506,7 +3505,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='pruner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2511>[source]</a></span></div>Apply the outlier strategy on the data.</p>
+<p><a id='pruner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2507>[source]</a></span></div>Apply the outlier strategy on the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3688,4 +3687,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/docs/API/data_cleaning/scaler/index.html b/docs/API/data_cleaning/scaler/index.html
index 367e4c821..9e5acd17f 100644
--- a/docs/API/data_cleaning/scaler/index.html
+++ b/docs/API/data_cleaning/scaler/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="scaler">Scaler</h1>
 <hr />
-<p><a id='Scaler'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Scaler</strong>(strategy="standard", include_binary=False, device="cpu", engine=None, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2679>[source]</a></span></div>Scale the data.</p>
+<p><a id='Scaler'></a><div class='sign'><em>class</em> atom.data_cleaning.<strong style='color:#008AB8'>Scaler</strong>(strategy="standard", include_binary=False, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2675>[source]</a></span></div>Scale the data.</p>
 <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p>
 <p>This class can be accessed from atom through the <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-scale">scale</a> method. Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/data_cleaning/#scaling-the-feature-set">user guide</a>.</p>
 <table class="table_params">
@@ -3259,17 +3259,16 @@ <h1 id="scaler">Scaler</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='scaler-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='scaler-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3277,7 +3276,7 @@ <h1 id="scaler">Scaler</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3351,7 +3350,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#scaler-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-inverse_transform">inverse_transform</a></td><td>Apply the inverse transformation to the data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#scaler-transform">transform</a></td><td>Perform standardization by centering and scaling.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='scaler-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2813>[source]</a></span></div>Fit to data.</p>
+<p><a id='scaler-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2809>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3362,7 +3361,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='scaler-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='scaler-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3406,7 +3405,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='scaler-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2900>[source]</a></span></div>Apply the inverse transformation to the data.</p>
+<p><a id='scaler-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2896>[source]</a></span></div>Apply the inverse transformation to the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3417,7 +3416,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='scaler-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='scaler-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3429,7 +3428,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='scaler-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='scaler-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3449,7 +3448,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='scaler-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2866>[source]</a></span></div>Perform standardization by centering and scaling.</p>
+<p><a id='scaler-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L2862>[source]</a></span></div>Perform standardization by centering and scaling.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/feature_engineering/featureextractor/index.html b/docs/API/feature_engineering/featureextractor/index.html
index d3ed2baa9..21127e33f 100644
--- a/docs/API/feature_engineering/featureextractor/index.html
+++ b/docs/API/feature_engineering/featureextractor/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="featureextractor">FeatureExtractor</h1>
 <hr />
-<p><a id='FeatureExtractor'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureExtractor</strong>(features=('day', 'month', 'year'), fmt=None, encoding_type="ordinal", drop_columns=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L47>[source]</a></span></div>Extract features from datetime columns.</p>
+<p><a id='FeatureExtractor'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureExtractor</strong>(features=('day', 'month', 'year'), fmt=None, encoding_type="ordinal", drop_columns=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L51>[source]</a></span></div>Extract features from datetime columns.</p>
 <p>Create new features extracting datetime elements (day, month,
 year, etc...) from the provided columns. Columns of dtype
 <code>datetime64</code> are used as is. Categorical columns that can be
@@ -3350,7 +3350,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureextractor-transform">transform</a></td><td>Extract the new features.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='featureextractor-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='featureextractor-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3375,7 +3375,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureextractor-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='featureextractor-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3419,7 +3419,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureextractor-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='featureextractor-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3443,7 +3443,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureextractor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='featureextractor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3455,7 +3455,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureextractor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='featureextractor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3475,7 +3475,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureextractor-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L178>[source]</a></span></div>Extract the new features.</p>
+<p><a id='featureextractor-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L183>[source]</a></span></div>Extract the new features.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/feature_engineering/featuregenerator/index.html b/docs/API/feature_engineering/featuregenerator/index.html
index 126254d56..a6e18dcf2 100644
--- a/docs/API/feature_engineering/featuregenerator/index.html
+++ b/docs/API/feature_engineering/featuregenerator/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="featuregenerator">FeatureGenerator</h1>
 <hr />
-<p><a id='FeatureGenerator'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureGenerator</strong>(strategy="dfs", n_features=None, operators=None, n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L290>[source]</a></span></div>Generate new features.</p>
+<p><a id='FeatureGenerator'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureGenerator</strong>(strategy="dfs", n_features=None, operators=None, n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L295>[source]</a></span></div>Generate new features.</p>
 <p>Create new combinations of existing features to capture the
 non-linear relations between the original features.</p>
 <p>This class can be accessed from atom through the <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-feature_generation">feature_generation</a> method. Read more in the
@@ -3368,7 +3368,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregenerator-transform">transform</a></td><td>Generate new features.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='featuregenerator-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L450>[source]</a></span></div>Fit to data.</p>
+<p><a id='featuregenerator-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L456>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3389,7 +3389,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregenerator-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='featuregenerator-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3433,7 +3433,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregenerator-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='featuregenerator-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3457,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregenerator-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='featuregenerator-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3469,7 +3469,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregenerator-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='featuregenerator-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3489,7 +3489,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregenerator-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L561>[source]</a></span></div>Generate new features.</p>
+<p><a id='featuregenerator-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L567>[source]</a></span></div>Generate new features.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/feature_engineering/featuregrouper/index.html b/docs/API/feature_engineering/featuregrouper/index.html
index 945fe9ba7..f1e1ff442 100644
--- a/docs/API/feature_engineering/featuregrouper/index.html
+++ b/docs/API/feature_engineering/featuregrouper/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="featuregrouper">FeatureGrouper</h1>
 <hr />
-<p><a id='FeatureGrouper'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureGrouper</strong>(group, operators=None, drop_columns=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L639>[source]</a></span></div>Extract statistics from similar features.</p>
+<p><a id='FeatureGrouper'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureGrouper</strong>(group, operators=None, drop_columns=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L645>[source]</a></span></div>Extract statistics from similar features.</p>
 <p>Replace groups of features with related characteristics with new
 features that summarize statistical properties of te group. The
 statistical operators are calculated over every row of the group.
@@ -3332,7 +3332,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featuregrouper-transform">transform</a></td><td>Group features.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='featuregrouper-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='featuregrouper-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3357,7 +3357,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregrouper-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='featuregrouper-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3401,7 +3401,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregrouper-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='featuregrouper-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3425,7 +3425,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregrouper-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='featuregrouper-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3437,7 +3437,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregrouper-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='featuregrouper-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3457,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featuregrouper-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L752>[source]</a></span></div>Group features.</p>
+<p><a id='featuregrouper-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L759>[source]</a></span></div>Group features.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
diff --git a/docs/API/feature_engineering/featureselector/index.html b/docs/API/feature_engineering/featureselector/index.html
index 7a5081c16..c427d5ba1 100644
--- a/docs/API/feature_engineering/featureselector/index.html
+++ b/docs/API/feature_engineering/featureselector/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="featureselector">FeatureSelector</h1>
 <hr />
-<p><a id='FeatureSelector'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureSelector</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L835>[source]</a></span></div>Reduce the number of features in the data.</p>
+<p><a id='FeatureSelector'></a><div class='sign'><em>class</em> atom.feature_engineering.<strong style='color:#008AB8'>FeatureSelector</strong>(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, logger=None, random_state=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L842>[source]</a></span></div>Reduce the number of features in the data.</p>
 <p>Apply feature selection or dimensionality reduction, either to
 improve the estimators' accuracy or to boost their performance on
 very high-dimensional datasets. Additionally, remove multicollinear
@@ -3413,17 +3413,16 @@ <h1 id="featureselector">FeatureSelector</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='featureselector-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='featureselector-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3431,7 +3430,7 @@ <h1 id="featureselector">FeatureSelector</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3529,7 +3528,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#featureselector-fit">fit</a></td><td>Fit the feature selector to the data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-plot_components">plot_components</a></td><td>Plot the explained variance ratio per component.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-plot_pca">plot_pca</a></td><td>Plot the explained variance ratio vs number of components.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-plot_rfecv">plot_rfecv</a></td><td>Plot the rfecv results.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-transform">transform</a></td><td>Transform the data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#featureselector-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='featureselector-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L1156>[source]</a></span></div>Fit the feature selector to the data.</p>
+<p><a id='featureselector-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L1163>[source]</a></span></div>Fit the feature selector to the data.</p>
 <p>The univariate, sfm (when model is not fitted), sfs, rfe and
 rfecv strategies need a target column. Leaving it None raises
 an exception.</p>
@@ -3553,7 +3552,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='featureselector-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3597,7 +3596,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='featureselector-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3621,7 +3620,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='featureselector-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3633,7 +3632,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-plot_components'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_components</strong>(show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1158>[source]</a></span></div>Plot the explained variance ratio per component.</p>
+<p><a id='featureselector-plot_components'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_components</strong>(show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L31>[source]</a></span></div>Plot the explained variance ratio per component.</p>
 <p>Kept components are colored and discarted components are
 transparent. This plot is available only when feature selection
 was applied with strategy="pca".</p>
@@ -3670,7 +3669,7 @@ <h2 id="methods">Methods</h2>
 <p></div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-plot_pca'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pca</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1284>[source]</a></span></div>Plot the explained variance ratio vs number of components.</p>
+<p><a id='featureselector-plot_pca'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pca</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L157>[source]</a></span></div>Plot the explained variance ratio vs number of components.</p>
 <p>If the underlying estimator is <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">PCA</a> (for dense datasets),
 all possible components are plotted. If the underlying estimator
 is <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html">TruncatedSVD</a> (for sparse datasets), it only shows the
@@ -3701,7 +3700,7 @@ <h2 id="methods">Methods</h2>
 <p></div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-plot_rfecv'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_rfecv</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1401>[source]</a></span></div>Plot the rfecv results.</p>
+<p><a id='featureselector-plot_rfecv'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_rfecv</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L274>[source]</a></span></div>Plot the rfecv results.</p>
 <p>Plot the scores obtained by the estimator fitted on every
 subset of the dataset. Only available when feature selection
 was applied with strategy="rfecv".</p>
@@ -3735,9 +3734,9 @@ <h2 id="methods">Methods</h2>
 <p></div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='featureselector-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='featureselector-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='featureselector-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3757,7 +3756,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L1571>[source]</a></span></div>Transform the data.</p>
+<p><a id='featureselector-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/feature_engineering.py#L1584>[source]</a></span></div>Transform the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features).</p>
@@ -3768,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='featureselector-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3777,7 +3776,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='featureselector-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='featureselector-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/adab/index.html b/docs/API/models/adab/index.html
index abed285ec..d31334411 100644
--- a/docs/API/models/adab/index.html
+++ b/docs/API/models/adab/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3376,7 +3376,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='adaboost-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='adaboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='adaboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='adaboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='adaboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='adaboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='adaboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='adaboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='adaboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='adaboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='adaboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='adaboost-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='adaboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='adaboost-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='adaboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='adaboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='adaboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='adaboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='adaboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='adaboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='adaboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='adaboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='adaboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='adaboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='adaboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='adaboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='adaboost-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='adaboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='adaboost-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='adaboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='adaboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3393,16 +3393,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='adaboost-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='adaboost-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='adaboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#adaboost-best_trial">best trial</a>.</div><a id='adaboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#adaboost-best_trial">best trial</a>.</div><a id='adaboost-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='adaboost-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='adaboost-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='adaboost-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='adaboost-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='adaboost-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='adaboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='adaboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#adaboost-best_trial">best trial</a>.</div><a id='adaboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#adaboost-best_trial">best trial</a>.</div><a id='adaboost-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='adaboost-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='adaboost-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='adaboost-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='adaboost-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='adaboost-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='adaboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#adaboost-score_bootstrap">score_bootstrap</a>.</div><a id='adaboost-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='adaboost-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='adaboost-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='adaboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#adaboost-score_bootstrap">score_bootstrap</a>.</div><a id='adaboost-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='adaboost-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='adaboost-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='adaboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3504,7 +3504,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#adaboost-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#adaboost-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='adaboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='adaboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3515,7 +3515,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='adaboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3532,7 +3532,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='adaboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3577,7 +3577,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='adaboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3592,7 +3592,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='adaboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='adaboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3604,7 +3604,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='adaboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3635,7 +3635,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='adaboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3649,14 +3649,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='adaboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3668,7 +3668,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='adaboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#adaboost-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3701,7 +3701,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='adaboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3736,7 +3736,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='adaboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3751,7 +3751,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='adaboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3774,7 +3774,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='adaboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3786,7 +3786,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='adaboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3799,7 +3799,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='adaboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3831,7 +3831,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='adaboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3843,7 +3843,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='adaboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3861,14 +3861,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='adaboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3879,7 +3879,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='adaboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3898,7 +3898,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='adaboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3916,9 +3916,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='adaboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='adaboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='adaboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3929,14 +3929,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='adaboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='adaboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3949,7 +3949,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3976,7 +3976,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='adaboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3998,7 +3998,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='adaboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4030,7 +4030,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='adaboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4039,7 +4039,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='adaboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='adaboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/ard/index.html b/docs/API/models/ard/index.html
index d0b441116..7af408572 100644
--- a/docs/API/models/ard/index.html
+++ b/docs/API/models/ard/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3367,7 +3367,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='automaticrelevancedetermination-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='automaticrelevancedetermination-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='automaticrelevancedetermination-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='automaticrelevancedetermination-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='automaticrelevancedetermination-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='automaticrelevancedetermination-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='automaticrelevancedetermination-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='automaticrelevancedetermination-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='automaticrelevancedetermination-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='automaticrelevancedetermination-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='automaticrelevancedetermination-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='automaticrelevancedetermination-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='automaticrelevancedetermination-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='automaticrelevancedetermination-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='automaticrelevancedetermination-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='automaticrelevancedetermination-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='automaticrelevancedetermination-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='automaticrelevancedetermination-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='automaticrelevancedetermination-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='automaticrelevancedetermination-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='automaticrelevancedetermination-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='automaticrelevancedetermination-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='automaticrelevancedetermination-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='automaticrelevancedetermination-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='automaticrelevancedetermination-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='automaticrelevancedetermination-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='automaticrelevancedetermination-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='automaticrelevancedetermination-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='automaticrelevancedetermination-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='automaticrelevancedetermination-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='automaticrelevancedetermination-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3384,16 +3384,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='automaticrelevancedetermination-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='automaticrelevancedetermination-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='automaticrelevancedetermination-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-best_trial">best trial</a>.</div><a id='automaticrelevancedetermination-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-best_trial">best trial</a>.</div><a id='automaticrelevancedetermination-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='automaticrelevancedetermination-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='automaticrelevancedetermination-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='automaticrelevancedetermination-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='automaticrelevancedetermination-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='automaticrelevancedetermination-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='automaticrelevancedetermination-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='automaticrelevancedetermination-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-best_trial">best trial</a>.</div><a id='automaticrelevancedetermination-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-best_trial">best trial</a>.</div><a id='automaticrelevancedetermination-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='automaticrelevancedetermination-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='automaticrelevancedetermination-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='automaticrelevancedetermination-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='automaticrelevancedetermination-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='automaticrelevancedetermination-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='automaticrelevancedetermination-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-score_bootstrap">score_bootstrap</a>.</div><a id='automaticrelevancedetermination-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='automaticrelevancedetermination-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='automaticrelevancedetermination-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='automaticrelevancedetermination-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-score_bootstrap">score_bootstrap</a>.</div><a id='automaticrelevancedetermination-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='automaticrelevancedetermination-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='automaticrelevancedetermination-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='automaticrelevancedetermination-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3441,7 +3441,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='automaticrelevancedetermination-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='automaticrelevancedetermination-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3452,7 +3452,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='automaticrelevancedetermination-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3469,7 +3469,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='automaticrelevancedetermination-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3514,7 +3514,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='automaticrelevancedetermination-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3529,7 +3529,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='automaticrelevancedetermination-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3541,7 +3541,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='automaticrelevancedetermination-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3572,7 +3572,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='automaticrelevancedetermination-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3586,14 +3586,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='automaticrelevancedetermination-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3605,7 +3605,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='automaticrelevancedetermination-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#automaticrelevancedetermination-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3638,7 +3638,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='automaticrelevancedetermination-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3673,7 +3673,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='automaticrelevancedetermination-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3688,7 +3688,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='automaticrelevancedetermination-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3711,7 +3711,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='automaticrelevancedetermination-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3723,7 +3723,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='automaticrelevancedetermination-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3736,7 +3736,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='automaticrelevancedetermination-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3768,7 +3768,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='automaticrelevancedetermination-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='automaticrelevancedetermination-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3798,14 +3798,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='automaticrelevancedetermination-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3816,7 +3816,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='automaticrelevancedetermination-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3835,7 +3835,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='automaticrelevancedetermination-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3853,9 +3853,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='automaticrelevancedetermination-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='automaticrelevancedetermination-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3866,14 +3866,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='automaticrelevancedetermination-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='automaticrelevancedetermination-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3886,7 +3886,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3913,7 +3913,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='automaticrelevancedetermination-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3935,7 +3935,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='automaticrelevancedetermination-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3967,7 +3967,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='automaticrelevancedetermination-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3976,7 +3976,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='automaticrelevancedetermination-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='automaticrelevancedetermination-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/arima/index.html b/docs/API/models/arima/index.html
index 5afadf367..7e93d0354 100644
--- a/docs/API/models/arima/index.html
+++ b/docs/API/models/arima/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3386,7 +3386,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='arima-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='arima-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='arima-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='arima-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='arima-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='arima-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='arima-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='arima-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='arima-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='arima-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='arima-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='arima-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='arima-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='arima-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='arima-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='arima-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='arima-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='arima-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='arima-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='arima-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='arima-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='arima-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='arima-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='arima-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='arima-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='arima-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='arima-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='arima-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='arima-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='arima-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='arima-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3403,16 +3403,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='arima-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='arima-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='arima-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#arima-best_trial">best trial</a>.</div><a id='arima-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#arima-best_trial">best trial</a>.</div><a id='arima-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='arima-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='arima-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='arima-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='arima-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='arima-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='arima-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='arima-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#arima-best_trial">best trial</a>.</div><a id='arima-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#arima-best_trial">best trial</a>.</div><a id='arima-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='arima-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='arima-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='arima-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='arima-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='arima-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='arima-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#arima-score_bootstrap">score_bootstrap</a>.</div><a id='arima-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='arima-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='arima-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='arima-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#arima-score_bootstrap">score_bootstrap</a>.</div><a id='arima-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='arima-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='arima-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='arima-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3520,7 +3520,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#arima-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#arima-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='arima-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='arima-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3531,7 +3531,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='arima-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3548,7 +3548,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='arima-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3593,7 +3593,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='arima-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3608,7 +3608,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='arima-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='arima-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3620,7 +3620,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='arima-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3651,7 +3651,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='arima-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3665,7 +3665,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='arima-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#arima-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3698,7 +3698,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='arima-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3733,7 +3733,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='arima-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3748,7 +3748,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='arima-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3771,7 +3771,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='arima-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3783,7 +3783,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='arima-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3796,7 +3796,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='arima-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3828,7 +3828,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='arima-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3840,7 +3840,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='arima-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3860,7 +3860,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='arima-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3882,7 +3882,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='arima-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3903,7 +3903,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='arima-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3927,7 +3927,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='arima-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3946,7 +3946,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='arima-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3969,7 +3969,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='arima-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3987,9 +3987,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='arima-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='arima-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='arima-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -4000,14 +4000,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='arima-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='arima-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -4039,7 +4039,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='arima-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4061,7 +4061,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='arima-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4093,7 +4093,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='arima-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4102,7 +4102,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='arima-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='arima-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/autoarima/index.html b/docs/API/models/autoarima/index.html
index a50bb177f..a46e0003d 100644
--- a/docs/API/models/autoarima/index.html
+++ b/docs/API/models/autoarima/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3377,7 +3377,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='autoarima-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='autoarima-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='autoarima-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='autoarima-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='autoarima-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='autoarima-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='autoarima-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='autoarima-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='autoarima-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='autoarima-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='autoarima-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='autoarima-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='autoarima-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='autoarima-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='autoarima-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='autoarima-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='autoarima-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='autoarima-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='autoarima-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='autoarima-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='autoarima-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='autoarima-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='autoarima-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='autoarima-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='autoarima-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='autoarima-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='autoarima-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='autoarima-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='autoarima-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='autoarima-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='autoarima-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3394,16 +3394,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='autoarima-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='autoarima-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='autoarima-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#autoarima-best_trial">best trial</a>.</div><a id='autoarima-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#autoarima-best_trial">best trial</a>.</div><a id='autoarima-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='autoarima-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='autoarima-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='autoarima-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='autoarima-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='autoarima-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='autoarima-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='autoarima-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#autoarima-best_trial">best trial</a>.</div><a id='autoarima-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#autoarima-best_trial">best trial</a>.</div><a id='autoarima-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='autoarima-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='autoarima-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='autoarima-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='autoarima-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='autoarima-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='autoarima-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#autoarima-score_bootstrap">score_bootstrap</a>.</div><a id='autoarima-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='autoarima-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='autoarima-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='autoarima-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#autoarima-score_bootstrap">score_bootstrap</a>.</div><a id='autoarima-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='autoarima-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='autoarima-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='autoarima-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#autoarima-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#autoarima-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='autoarima-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='autoarima-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3522,7 +3522,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='autoarima-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3539,7 +3539,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='autoarima-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3584,7 +3584,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='autoarima-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3599,7 +3599,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='autoarima-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='autoarima-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3611,7 +3611,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='autoarima-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3642,7 +3642,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='autoarima-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3656,7 +3656,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='autoarima-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#autoarima-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3689,7 +3689,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='autoarima-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3724,7 +3724,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='autoarima-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3739,7 +3739,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='autoarima-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3762,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='autoarima-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3774,7 +3774,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='autoarima-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3787,7 +3787,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='autoarima-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3819,7 +3819,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='autoarima-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3831,7 +3831,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='autoarima-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3851,7 +3851,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='autoarima-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3873,7 +3873,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='autoarima-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3894,7 +3894,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='autoarima-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3918,7 +3918,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='autoarima-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3937,7 +3937,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='autoarima-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3960,7 +3960,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='autoarima-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3978,9 +3978,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='autoarima-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='autoarima-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='autoarima-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3991,14 +3991,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='autoarima-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='autoarima-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -4030,7 +4030,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='autoarima-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4052,7 +4052,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='autoarima-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4084,7 +4084,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='autoarima-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4093,7 +4093,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='autoarima-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='autoarima-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/bag/index.html b/docs/API/models/bag/index.html
index bfa1815ab..6c8f9a865 100644
--- a/docs/API/models/bag/index.html
+++ b/docs/API/models/bag/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3378,7 +3378,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='bagging-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='bagging-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bagging-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bagging-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bagging-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bagging-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bagging-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bagging-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bagging-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bagging-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bagging-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bagging-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bagging-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bagging-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='bagging-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bagging-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='bagging-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bagging-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bagging-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bagging-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bagging-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bagging-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bagging-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bagging-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bagging-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bagging-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bagging-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bagging-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bagging-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='bagging-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bagging-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3395,16 +3395,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bagging-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bagging-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='bagging-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bagging-best_trial">best trial</a>.</div><a id='bagging-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bagging-best_trial">best trial</a>.</div><a id='bagging-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bagging-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bagging-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bagging-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bagging-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bagging-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bagging-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='bagging-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bagging-best_trial">best trial</a>.</div><a id='bagging-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bagging-best_trial">best trial</a>.</div><a id='bagging-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bagging-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bagging-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bagging-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bagging-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bagging-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bagging-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#bagging-score_bootstrap">score_bootstrap</a>.</div><a id='bagging-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bagging-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bagging-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bagging-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#bagging-score_bootstrap">score_bootstrap</a>.</div><a id='bagging-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bagging-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bagging-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bagging-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3506,7 +3506,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#bagging-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bagging-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='bagging-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='bagging-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3517,7 +3517,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='bagging-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3534,7 +3534,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='bagging-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3579,7 +3579,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='bagging-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3594,7 +3594,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='bagging-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='bagging-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3606,7 +3606,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='bagging-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3637,7 +3637,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='bagging-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3651,14 +3651,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='bagging-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3670,7 +3670,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='bagging-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#bagging-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3703,7 +3703,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='bagging-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3738,7 +3738,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='bagging-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3753,7 +3753,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='bagging-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3776,7 +3776,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='bagging-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3788,7 +3788,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='bagging-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3801,7 +3801,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='bagging-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3833,7 +3833,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='bagging-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3845,7 +3845,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='bagging-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3863,14 +3863,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='bagging-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3881,7 +3881,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='bagging-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3900,7 +3900,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='bagging-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3918,9 +3918,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='bagging-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='bagging-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='bagging-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3931,14 +3931,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='bagging-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='bagging-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3951,7 +3951,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3978,7 +3978,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='bagging-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4000,7 +4000,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='bagging-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4032,7 +4032,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='bagging-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4041,7 +4041,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bagging-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='bagging-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/bnb/index.html b/docs/API/models/bnb/index.html
index f87ba9638..47d3bed13 100644
--- a/docs/API/models/bnb/index.html
+++ b/docs/API/models/bnb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3374,7 +3374,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='bernoullinb-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='bernoullinb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bernoullinb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bernoullinb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bernoullinb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bernoullinb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bernoullinb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bernoullinb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bernoullinb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bernoullinb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bernoullinb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bernoullinb-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bernoullinb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bernoullinb-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='bernoullinb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bernoullinb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='bernoullinb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bernoullinb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bernoullinb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bernoullinb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bernoullinb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bernoullinb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bernoullinb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bernoullinb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bernoullinb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bernoullinb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bernoullinb-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bernoullinb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bernoullinb-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='bernoullinb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bernoullinb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3391,16 +3391,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bernoullinb-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bernoullinb-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='bernoullinb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bernoullinb-best_trial">best trial</a>.</div><a id='bernoullinb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bernoullinb-best_trial">best trial</a>.</div><a id='bernoullinb-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bernoullinb-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bernoullinb-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bernoullinb-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bernoullinb-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bernoullinb-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bernoullinb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='bernoullinb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bernoullinb-best_trial">best trial</a>.</div><a id='bernoullinb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bernoullinb-best_trial">best trial</a>.</div><a id='bernoullinb-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bernoullinb-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bernoullinb-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bernoullinb-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bernoullinb-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bernoullinb-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bernoullinb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#bernoullinb-score_bootstrap">score_bootstrap</a>.</div><a id='bernoullinb-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bernoullinb-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bernoullinb-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bernoullinb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#bernoullinb-score_bootstrap">score_bootstrap</a>.</div><a id='bernoullinb-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bernoullinb-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bernoullinb-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bernoullinb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3484,7 +3484,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bernoullinb-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='bernoullinb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='bernoullinb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3495,7 +3495,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='bernoullinb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3512,7 +3512,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='bernoullinb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3557,7 +3557,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='bernoullinb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3572,7 +3572,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='bernoullinb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='bernoullinb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3584,7 +3584,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='bernoullinb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3615,7 +3615,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='bernoullinb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3629,14 +3629,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='bernoullinb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3648,7 +3648,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='bernoullinb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#bernoullinb-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3681,7 +3681,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='bernoullinb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3716,7 +3716,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='bernoullinb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3731,7 +3731,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='bernoullinb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3754,7 +3754,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='bernoullinb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3766,7 +3766,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='bernoullinb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3779,7 +3779,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='bernoullinb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3811,7 +3811,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='bernoullinb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3823,7 +3823,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='bernoullinb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3841,14 +3841,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='bernoullinb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3859,7 +3859,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='bernoullinb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3878,7 +3878,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='bernoullinb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3896,9 +3896,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='bernoullinb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='bernoullinb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='bernoullinb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3909,14 +3909,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='bernoullinb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='bernoullinb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3929,7 +3929,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3956,7 +3956,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='bernoullinb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3978,7 +3978,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='bernoullinb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4010,7 +4010,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='bernoullinb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4019,7 +4019,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bernoullinb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='bernoullinb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/br/index.html b/docs/API/models/br/index.html
index 4ada82b23..46dc50213 100644
--- a/docs/API/models/br/index.html
+++ b/docs/API/models/br/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3366,7 +3366,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='bayesianridge-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='bayesianridge-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bayesianridge-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bayesianridge-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bayesianridge-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bayesianridge-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bayesianridge-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bayesianridge-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bayesianridge-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bayesianridge-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bayesianridge-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bayesianridge-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bayesianridge-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bayesianridge-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='bayesianridge-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bayesianridge-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='bayesianridge-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='bayesianridge-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='bayesianridge-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='bayesianridge-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='bayesianridge-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='bayesianridge-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='bayesianridge-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='bayesianridge-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='bayesianridge-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='bayesianridge-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='bayesianridge-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='bayesianridge-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='bayesianridge-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='bayesianridge-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='bayesianridge-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3383,16 +3383,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bayesianridge-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='bayesianridge-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='bayesianridge-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bayesianridge-best_trial">best trial</a>.</div><a id='bayesianridge-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bayesianridge-best_trial">best trial</a>.</div><a id='bayesianridge-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bayesianridge-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bayesianridge-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bayesianridge-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bayesianridge-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bayesianridge-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bayesianridge-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='bayesianridge-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#bayesianridge-best_trial">best trial</a>.</div><a id='bayesianridge-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#bayesianridge-best_trial">best trial</a>.</div><a id='bayesianridge-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='bayesianridge-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='bayesianridge-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='bayesianridge-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='bayesianridge-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='bayesianridge-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='bayesianridge-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#bayesianridge-score_bootstrap">score_bootstrap</a>.</div><a id='bayesianridge-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bayesianridge-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bayesianridge-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bayesianridge-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#bayesianridge-score_bootstrap">score_bootstrap</a>.</div><a id='bayesianridge-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='bayesianridge-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='bayesianridge-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='bayesianridge-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3440,7 +3440,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#bayesianridge-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='bayesianridge-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='bayesianridge-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3451,7 +3451,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='bayesianridge-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3468,7 +3468,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='bayesianridge-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3513,7 +3513,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='bayesianridge-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3528,7 +3528,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='bayesianridge-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='bayesianridge-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3540,7 +3540,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='bayesianridge-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3571,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='bayesianridge-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3585,14 +3585,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='bayesianridge-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3604,7 +3604,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='bayesianridge-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#bayesianridge-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3637,7 +3637,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='bayesianridge-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3672,7 +3672,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='bayesianridge-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3687,7 +3687,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='bayesianridge-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3710,7 +3710,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='bayesianridge-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3722,7 +3722,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='bayesianridge-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3735,7 +3735,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='bayesianridge-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3767,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='bayesianridge-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3779,7 +3779,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='bayesianridge-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3797,14 +3797,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='bayesianridge-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3815,7 +3815,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='bayesianridge-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3834,7 +3834,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='bayesianridge-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3852,9 +3852,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='bayesianridge-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='bayesianridge-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='bayesianridge-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3865,14 +3865,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='bayesianridge-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='bayesianridge-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3885,7 +3885,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3912,7 +3912,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='bayesianridge-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3934,7 +3934,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='bayesianridge-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3966,7 +3966,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='bayesianridge-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3975,7 +3975,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='bayesianridge-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='bayesianridge-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/catb/index.html b/docs/API/models/catb/index.html
index 2f25dfbc8..d94e2ec96 100644
--- a/docs/API/models/catb/index.html
+++ b/docs/API/models/catb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3397,7 +3397,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='catboost-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='catboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='catboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='catboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='catboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='catboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='catboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='catboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='catboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='catboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='catboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='catboost-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='catboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='catboost-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='catboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='catboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='catboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='catboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='catboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='catboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='catboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='catboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='catboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='catboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='catboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='catboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='catboost-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='catboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='catboost-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='catboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='catboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3414,19 +3414,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='catboost-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='catboost-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='catboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#catboost-best_trial">best trial</a>.</div><a id='catboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#catboost-best_trial">best trial</a>.</div><a id='catboost-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='catboost-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='catboost-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='catboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#catboost-best_trial">best trial</a>.</div><a id='catboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#catboost-best_trial">best trial</a>.</div><a id='catboost-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='catboost-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='catboost-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='catboost-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='catboost-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='catboost-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='catboost-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='catboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='catboost-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='catboost-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='catboost-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='catboost-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='catboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#catboost-score_bootstrap">score_bootstrap</a>.</div><a id='catboost-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='catboost-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='catboost-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='catboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#catboost-score_bootstrap">score_bootstrap</a>.</div><a id='catboost-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='catboost-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='catboost-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='catboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#catboost-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#catboost-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='catboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='catboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3521,7 +3521,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='catboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3538,7 +3538,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='catboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3583,7 +3583,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='catboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3598,7 +3598,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='catboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='catboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3610,7 +3610,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='catboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3641,7 +3641,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='catboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3655,14 +3655,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='catboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3674,7 +3674,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='catboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#catboost-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3707,7 +3707,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='catboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3742,7 +3742,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='catboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3757,7 +3757,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='catboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='catboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3792,7 +3792,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='catboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3805,7 +3805,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='catboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3837,7 +3837,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='catboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3849,7 +3849,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='catboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3867,14 +3867,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='catboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3885,7 +3885,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='catboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3904,7 +3904,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='catboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3922,9 +3922,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='catboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='catboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='catboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3935,14 +3935,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='catboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='catboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3982,7 +3982,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='catboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4004,7 +4004,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='catboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4036,7 +4036,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='catboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4045,7 +4045,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='catboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='catboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/catnb/index.html b/docs/API/models/catnb/index.html
index 01a528f8e..985e2032a 100644
--- a/docs/API/models/catnb/index.html
+++ b/docs/API/models/catnb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3373,7 +3373,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='categoricalnb-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='categoricalnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='categoricalnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='categoricalnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='categoricalnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='categoricalnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='categoricalnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='categoricalnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='categoricalnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='categoricalnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='categoricalnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='categoricalnb-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='categoricalnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='categoricalnb-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='categoricalnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='categoricalnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='categoricalnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='categoricalnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='categoricalnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='categoricalnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='categoricalnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='categoricalnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='categoricalnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='categoricalnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='categoricalnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='categoricalnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='categoricalnb-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='categoricalnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='categoricalnb-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='categoricalnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='categoricalnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3390,16 +3390,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='categoricalnb-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='categoricalnb-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='categoricalnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#categoricalnb-best_trial">best trial</a>.</div><a id='categoricalnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#categoricalnb-best_trial">best trial</a>.</div><a id='categoricalnb-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='categoricalnb-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='categoricalnb-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='categoricalnb-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='categoricalnb-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='categoricalnb-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='categoricalnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='categoricalnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#categoricalnb-best_trial">best trial</a>.</div><a id='categoricalnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#categoricalnb-best_trial">best trial</a>.</div><a id='categoricalnb-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='categoricalnb-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='categoricalnb-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='categoricalnb-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='categoricalnb-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='categoricalnb-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='categoricalnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#categoricalnb-score_bootstrap">score_bootstrap</a>.</div><a id='categoricalnb-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='categoricalnb-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='categoricalnb-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='categoricalnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#categoricalnb-score_bootstrap">score_bootstrap</a>.</div><a id='categoricalnb-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='categoricalnb-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='categoricalnb-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='categoricalnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3483,7 +3483,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#categoricalnb-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='categoricalnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='categoricalnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3494,7 +3494,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='categoricalnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='categoricalnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3556,7 +3556,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='categoricalnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3571,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='categoricalnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='categoricalnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3583,7 +3583,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='categoricalnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3614,7 +3614,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='categoricalnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3628,14 +3628,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='categoricalnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3647,7 +3647,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='categoricalnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#categoricalnb-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3680,7 +3680,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='categoricalnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3715,7 +3715,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='categoricalnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3730,7 +3730,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='categoricalnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3753,7 +3753,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='categoricalnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3765,7 +3765,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='categoricalnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3778,7 +3778,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='categoricalnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3810,7 +3810,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='categoricalnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3822,7 +3822,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='categoricalnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3840,14 +3840,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='categoricalnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3858,7 +3858,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='categoricalnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3877,7 +3877,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='categoricalnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3895,9 +3895,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='categoricalnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='categoricalnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='categoricalnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3908,14 +3908,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='categoricalnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='categoricalnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3928,7 +3928,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='categoricalnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3977,7 +3977,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='categoricalnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4009,7 +4009,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='categoricalnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4018,7 +4018,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='categoricalnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='categoricalnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/cnb/index.html b/docs/API/models/cnb/index.html
index 97289dc21..b635aee53 100644
--- a/docs/API/models/cnb/index.html
+++ b/docs/API/models/cnb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3373,7 +3373,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='complementnb-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='complementnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='complementnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='complementnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='complementnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='complementnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='complementnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='complementnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='complementnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='complementnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='complementnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='complementnb-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='complementnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='complementnb-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='complementnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='complementnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='complementnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='complementnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='complementnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='complementnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='complementnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='complementnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='complementnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='complementnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='complementnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='complementnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='complementnb-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='complementnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='complementnb-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='complementnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='complementnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3390,16 +3390,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='complementnb-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='complementnb-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='complementnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#complementnb-best_trial">best trial</a>.</div><a id='complementnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#complementnb-best_trial">best trial</a>.</div><a id='complementnb-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='complementnb-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='complementnb-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='complementnb-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='complementnb-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='complementnb-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='complementnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='complementnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#complementnb-best_trial">best trial</a>.</div><a id='complementnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#complementnb-best_trial">best trial</a>.</div><a id='complementnb-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='complementnb-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='complementnb-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='complementnb-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='complementnb-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='complementnb-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='complementnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#complementnb-score_bootstrap">score_bootstrap</a>.</div><a id='complementnb-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='complementnb-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='complementnb-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='complementnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#complementnb-score_bootstrap">score_bootstrap</a>.</div><a id='complementnb-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='complementnb-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='complementnb-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='complementnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3483,7 +3483,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#complementnb-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#complementnb-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='complementnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='complementnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3494,7 +3494,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='complementnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='complementnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3556,7 +3556,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='complementnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3571,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='complementnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='complementnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3583,7 +3583,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='complementnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3614,7 +3614,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='complementnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3628,14 +3628,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='complementnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3647,7 +3647,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='complementnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#complementnb-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3680,7 +3680,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='complementnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3715,7 +3715,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='complementnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3730,7 +3730,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='complementnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3753,7 +3753,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='complementnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3765,7 +3765,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='complementnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3778,7 +3778,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='complementnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3810,7 +3810,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='complementnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3822,7 +3822,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='complementnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3840,14 +3840,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='complementnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3858,7 +3858,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='complementnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3877,7 +3877,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='complementnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3895,9 +3895,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='complementnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='complementnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='complementnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3908,14 +3908,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='complementnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='complementnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3928,7 +3928,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='complementnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3977,7 +3977,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='complementnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4009,7 +4009,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='complementnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4018,7 +4018,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='complementnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='complementnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/dummy/index.html b/docs/API/models/dummy/index.html
index a4867bf8b..18ed14a68 100644
--- a/docs/API/models/dummy/index.html
+++ b/docs/API/models/dummy/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3376,7 +3376,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='dummy-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='dummy-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='dummy-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='dummy-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='dummy-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='dummy-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='dummy-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='dummy-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='dummy-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='dummy-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='dummy-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='dummy-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='dummy-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='dummy-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='dummy-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='dummy-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='dummy-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='dummy-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='dummy-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='dummy-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='dummy-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='dummy-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='dummy-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='dummy-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='dummy-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='dummy-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='dummy-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='dummy-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='dummy-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='dummy-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='dummy-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3393,16 +3393,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='dummy-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='dummy-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='dummy-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#dummy-best_trial">best trial</a>.</div><a id='dummy-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#dummy-best_trial">best trial</a>.</div><a id='dummy-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='dummy-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='dummy-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='dummy-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='dummy-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='dummy-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='dummy-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='dummy-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#dummy-best_trial">best trial</a>.</div><a id='dummy-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#dummy-best_trial">best trial</a>.</div><a id='dummy-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='dummy-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='dummy-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='dummy-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='dummy-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='dummy-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='dummy-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#dummy-score_bootstrap">score_bootstrap</a>.</div><a id='dummy-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='dummy-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='dummy-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='dummy-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#dummy-score_bootstrap">score_bootstrap</a>.</div><a id='dummy-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='dummy-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='dummy-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='dummy-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3486,7 +3486,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#dummy-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#dummy-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='dummy-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='dummy-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3497,7 +3497,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='dummy-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3514,7 +3514,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='dummy-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3559,7 +3559,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='dummy-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3574,7 +3574,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='dummy-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='dummy-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3586,7 +3586,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='dummy-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3617,7 +3617,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='dummy-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3631,14 +3631,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='dummy-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3650,7 +3650,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='dummy-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#dummy-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3683,7 +3683,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='dummy-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3718,7 +3718,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='dummy-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3733,7 +3733,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='dummy-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3756,7 +3756,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='dummy-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3768,7 +3768,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='dummy-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3781,7 +3781,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='dummy-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3813,7 +3813,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='dummy-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3825,7 +3825,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='dummy-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3843,14 +3843,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='dummy-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3861,7 +3861,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='dummy-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3880,7 +3880,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='dummy-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3898,9 +3898,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='dummy-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='dummy-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='dummy-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3911,14 +3911,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='dummy-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='dummy-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3931,7 +3931,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3958,7 +3958,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='dummy-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3980,7 +3980,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='dummy-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4012,7 +4012,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='dummy-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4021,7 +4021,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='dummy-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='dummy-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/en/index.html b/docs/API/models/en/index.html
index 443e4c6a1..637b3353d 100644
--- a/docs/API/models/en/index.html
+++ b/docs/API/models/en/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3383,7 +3383,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='elasticnet-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='elasticnet-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='elasticnet-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='elasticnet-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='elasticnet-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='elasticnet-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='elasticnet-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='elasticnet-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='elasticnet-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='elasticnet-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='elasticnet-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='elasticnet-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='elasticnet-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='elasticnet-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='elasticnet-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='elasticnet-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='elasticnet-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='elasticnet-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='elasticnet-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='elasticnet-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='elasticnet-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='elasticnet-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='elasticnet-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='elasticnet-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='elasticnet-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='elasticnet-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='elasticnet-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='elasticnet-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='elasticnet-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='elasticnet-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='elasticnet-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3400,16 +3400,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='elasticnet-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='elasticnet-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='elasticnet-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#elasticnet-best_trial">best trial</a>.</div><a id='elasticnet-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#elasticnet-best_trial">best trial</a>.</div><a id='elasticnet-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='elasticnet-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='elasticnet-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='elasticnet-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='elasticnet-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='elasticnet-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='elasticnet-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='elasticnet-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#elasticnet-best_trial">best trial</a>.</div><a id='elasticnet-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#elasticnet-best_trial">best trial</a>.</div><a id='elasticnet-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='elasticnet-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='elasticnet-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='elasticnet-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='elasticnet-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='elasticnet-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='elasticnet-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#elasticnet-score_bootstrap">score_bootstrap</a>.</div><a id='elasticnet-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='elasticnet-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='elasticnet-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='elasticnet-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#elasticnet-score_bootstrap">score_bootstrap</a>.</div><a id='elasticnet-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='elasticnet-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='elasticnet-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='elasticnet-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3457,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#elasticnet-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='elasticnet-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='elasticnet-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3468,7 +3468,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='elasticnet-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='elasticnet-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3530,7 +3530,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='elasticnet-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3545,7 +3545,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='elasticnet-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='elasticnet-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3557,7 +3557,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='elasticnet-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3588,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='elasticnet-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3602,14 +3602,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='elasticnet-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3621,7 +3621,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='elasticnet-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#elasticnet-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3654,7 +3654,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='elasticnet-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3689,7 +3689,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='elasticnet-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3704,7 +3704,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='elasticnet-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3727,7 +3727,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='elasticnet-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3739,7 +3739,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='elasticnet-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3752,7 +3752,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='elasticnet-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3784,7 +3784,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='elasticnet-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3796,7 +3796,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='elasticnet-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3814,14 +3814,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='elasticnet-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3832,7 +3832,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='elasticnet-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3851,7 +3851,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='elasticnet-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3869,9 +3869,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='elasticnet-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='elasticnet-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='elasticnet-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3882,14 +3882,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='elasticnet-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='elasticnet-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3902,7 +3902,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3929,7 +3929,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='elasticnet-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3951,7 +3951,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='elasticnet-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3983,7 +3983,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='elasticnet-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3992,7 +3992,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='elasticnet-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='elasticnet-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/es/index.html b/docs/API/models/es/index.html
index b6e048495..1e2c0148b 100644
--- a/docs/API/models/es/index.html
+++ b/docs/API/models/es/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3365,7 +3365,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='exponentialsmoothing-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='exponentialsmoothing-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='exponentialsmoothing-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='exponentialsmoothing-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='exponentialsmoothing-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='exponentialsmoothing-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='exponentialsmoothing-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='exponentialsmoothing-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='exponentialsmoothing-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='exponentialsmoothing-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='exponentialsmoothing-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='exponentialsmoothing-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='exponentialsmoothing-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='exponentialsmoothing-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='exponentialsmoothing-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='exponentialsmoothing-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='exponentialsmoothing-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='exponentialsmoothing-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='exponentialsmoothing-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='exponentialsmoothing-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='exponentialsmoothing-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='exponentialsmoothing-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='exponentialsmoothing-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='exponentialsmoothing-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='exponentialsmoothing-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='exponentialsmoothing-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='exponentialsmoothing-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='exponentialsmoothing-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='exponentialsmoothing-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='exponentialsmoothing-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='exponentialsmoothing-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3382,16 +3382,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='exponentialsmoothing-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='exponentialsmoothing-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='exponentialsmoothing-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#exponentialsmoothing-best_trial">best trial</a>.</div><a id='exponentialsmoothing-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#exponentialsmoothing-best_trial">best trial</a>.</div><a id='exponentialsmoothing-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='exponentialsmoothing-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='exponentialsmoothing-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='exponentialsmoothing-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='exponentialsmoothing-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='exponentialsmoothing-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='exponentialsmoothing-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='exponentialsmoothing-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#exponentialsmoothing-best_trial">best trial</a>.</div><a id='exponentialsmoothing-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#exponentialsmoothing-best_trial">best trial</a>.</div><a id='exponentialsmoothing-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='exponentialsmoothing-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='exponentialsmoothing-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='exponentialsmoothing-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='exponentialsmoothing-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='exponentialsmoothing-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='exponentialsmoothing-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#exponentialsmoothing-score_bootstrap">score_bootstrap</a>.</div><a id='exponentialsmoothing-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='exponentialsmoothing-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='exponentialsmoothing-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='exponentialsmoothing-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#exponentialsmoothing-score_bootstrap">score_bootstrap</a>.</div><a id='exponentialsmoothing-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='exponentialsmoothing-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='exponentialsmoothing-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='exponentialsmoothing-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3454,7 +3454,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#exponentialsmoothing-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='exponentialsmoothing-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='exponentialsmoothing-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3465,7 +3465,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='exponentialsmoothing-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3482,7 +3482,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='exponentialsmoothing-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='exponentialsmoothing-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3542,7 +3542,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='exponentialsmoothing-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3554,7 +3554,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='exponentialsmoothing-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3585,7 +3585,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='exponentialsmoothing-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3599,7 +3599,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='exponentialsmoothing-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#exponentialsmoothing-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3632,7 +3632,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='exponentialsmoothing-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3667,7 +3667,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='exponentialsmoothing-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3682,7 +3682,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='exponentialsmoothing-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3705,7 +3705,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='exponentialsmoothing-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='exponentialsmoothing-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3730,7 +3730,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='exponentialsmoothing-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3762,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='exponentialsmoothing-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3774,7 +3774,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3794,7 +3794,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3816,7 +3816,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3837,7 +3837,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3861,7 +3861,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3880,7 +3880,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='exponentialsmoothing-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3903,7 +3903,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='exponentialsmoothing-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3921,9 +3921,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='exponentialsmoothing-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='exponentialsmoothing-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3934,14 +3934,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='exponentialsmoothing-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='exponentialsmoothing-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3973,7 +3973,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='exponentialsmoothing-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3995,7 +3995,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='exponentialsmoothing-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4027,7 +4027,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='exponentialsmoothing-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4036,7 +4036,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='exponentialsmoothing-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='exponentialsmoothing-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/et/index.html b/docs/API/models/et/index.html
index 0dbc810e8..c6f9a81d4 100644
--- a/docs/API/models/et/index.html
+++ b/docs/API/models/et/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3375,7 +3375,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='extratrees-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='extratrees-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='extratrees-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='extratrees-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='extratrees-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='extratrees-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='extratrees-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='extratrees-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='extratrees-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='extratrees-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='extratrees-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='extratrees-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='extratrees-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='extratrees-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='extratrees-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='extratrees-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='extratrees-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='extratrees-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='extratrees-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='extratrees-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='extratrees-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='extratrees-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='extratrees-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='extratrees-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='extratrees-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='extratrees-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='extratrees-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='extratrees-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='extratrees-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='extratrees-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='extratrees-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3392,16 +3392,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='extratrees-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='extratrees-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='extratrees-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#extratrees-best_trial">best trial</a>.</div><a id='extratrees-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#extratrees-best_trial">best trial</a>.</div><a id='extratrees-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='extratrees-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='extratrees-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='extratrees-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='extratrees-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='extratrees-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='extratrees-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='extratrees-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#extratrees-best_trial">best trial</a>.</div><a id='extratrees-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#extratrees-best_trial">best trial</a>.</div><a id='extratrees-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='extratrees-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='extratrees-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='extratrees-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='extratrees-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='extratrees-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='extratrees-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#extratrees-score_bootstrap">score_bootstrap</a>.</div><a id='extratrees-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='extratrees-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='extratrees-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='extratrees-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#extratrees-score_bootstrap">score_bootstrap</a>.</div><a id='extratrees-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='extratrees-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='extratrees-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='extratrees-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#extratrees-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratrees-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='extratrees-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='extratrees-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='extratrees-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3513,7 +3513,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='extratrees-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3558,7 +3558,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='extratrees-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3573,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='extratrees-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='extratrees-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3585,7 +3585,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='extratrees-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3616,7 +3616,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='extratrees-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3630,14 +3630,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='extratrees-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3649,7 +3649,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='extratrees-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#extratrees-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3682,7 +3682,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='extratrees-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='extratrees-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3732,7 +3732,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='extratrees-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3755,7 +3755,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='extratrees-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3767,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='extratrees-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='extratrees-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3812,7 +3812,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='extratrees-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3824,7 +3824,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='extratrees-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3842,14 +3842,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='extratrees-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3860,7 +3860,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='extratrees-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3879,7 +3879,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='extratrees-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3897,9 +3897,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='extratrees-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='extratrees-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='extratrees-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3910,14 +3910,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='extratrees-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='extratrees-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3930,7 +3930,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3957,7 +3957,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='extratrees-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3979,7 +3979,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='extratrees-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4011,7 +4011,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='extratrees-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4020,7 +4020,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratrees-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='extratrees-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/etree/index.html b/docs/API/models/etree/index.html
index 9513ea8ff..96312fd99 100644
--- a/docs/API/models/etree/index.html
+++ b/docs/API/models/etree/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3377,7 +3377,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='extratree-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='extratree-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='extratree-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='extratree-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='extratree-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='extratree-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='extratree-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='extratree-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='extratree-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='extratree-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='extratree-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='extratree-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='extratree-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='extratree-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='extratree-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='extratree-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='extratree-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='extratree-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='extratree-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='extratree-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='extratree-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='extratree-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='extratree-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='extratree-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='extratree-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='extratree-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='extratree-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='extratree-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='extratree-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='extratree-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='extratree-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3394,16 +3394,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='extratree-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='extratree-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='extratree-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#extratree-best_trial">best trial</a>.</div><a id='extratree-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#extratree-best_trial">best trial</a>.</div><a id='extratree-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='extratree-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='extratree-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='extratree-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='extratree-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='extratree-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='extratree-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='extratree-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#extratree-best_trial">best trial</a>.</div><a id='extratree-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#extratree-best_trial">best trial</a>.</div><a id='extratree-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='extratree-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='extratree-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='extratree-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='extratree-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='extratree-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='extratree-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#extratree-score_bootstrap">score_bootstrap</a>.</div><a id='extratree-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='extratree-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='extratree-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='extratree-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#extratree-score_bootstrap">score_bootstrap</a>.</div><a id='extratree-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='extratree-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='extratree-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='extratree-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3487,7 +3487,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#extratree-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#extratree-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='extratree-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='extratree-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3498,7 +3498,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='extratree-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3515,7 +3515,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='extratree-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3560,7 +3560,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='extratree-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3575,7 +3575,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='extratree-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='extratree-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3587,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='extratree-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3618,7 +3618,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='extratree-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3632,14 +3632,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='extratree-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3651,7 +3651,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='extratree-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#extratree-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3684,7 +3684,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='extratree-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3719,7 +3719,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='extratree-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3734,7 +3734,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='extratree-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3757,7 +3757,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='extratree-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3769,7 +3769,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='extratree-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3782,7 +3782,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='extratree-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3814,7 +3814,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='extratree-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3826,7 +3826,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='extratree-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3844,14 +3844,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='extratree-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3862,7 +3862,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='extratree-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3881,7 +3881,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='extratree-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3899,9 +3899,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='extratree-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='extratree-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='extratree-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3912,14 +3912,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='extratree-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='extratree-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3932,7 +3932,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3959,7 +3959,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='extratree-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3981,7 +3981,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='extratree-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4013,7 +4013,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='extratree-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4022,7 +4022,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='extratree-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='extratree-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/ets/index.html b/docs/API/models/ets/index.html
index c9f5fbc88..b1cc1f935 100644
--- a/docs/API/models/ets/index.html
+++ b/docs/API/models/ets/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3366,7 +3366,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='ets-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='ets-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ets-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ets-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ets-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ets-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ets-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ets-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ets-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ets-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ets-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ets-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ets-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ets-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='ets-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ets-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='ets-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ets-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ets-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ets-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ets-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ets-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ets-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ets-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ets-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ets-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ets-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ets-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ets-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='ets-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ets-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3383,16 +3383,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ets-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ets-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='ets-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ets-best_trial">best trial</a>.</div><a id='ets-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ets-best_trial">best trial</a>.</div><a id='ets-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ets-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ets-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ets-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ets-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ets-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ets-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='ets-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ets-best_trial">best trial</a>.</div><a id='ets-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ets-best_trial">best trial</a>.</div><a id='ets-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ets-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ets-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ets-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ets-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ets-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ets-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#ets-score_bootstrap">score_bootstrap</a>.</div><a id='ets-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ets-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ets-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ets-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#ets-score_bootstrap">score_bootstrap</a>.</div><a id='ets-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ets-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ets-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ets-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3500,7 +3500,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#ets-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ets-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='ets-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='ets-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='ets-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3528,7 +3528,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='ets-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3573,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='ets-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3588,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='ets-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='ets-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3600,7 +3600,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='ets-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3631,7 +3631,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='ets-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3645,7 +3645,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='ets-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#ets-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3678,7 +3678,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='ets-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3713,7 +3713,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='ets-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3728,7 +3728,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='ets-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3751,7 +3751,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='ets-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3763,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='ets-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3776,7 +3776,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='ets-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3808,7 +3808,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='ets-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3820,7 +3820,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='ets-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3840,7 +3840,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='ets-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3862,7 +3862,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='ets-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3883,7 +3883,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='ets-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3907,7 +3907,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='ets-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3926,7 +3926,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='ets-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3949,7 +3949,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='ets-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3967,9 +3967,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='ets-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='ets-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='ets-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3980,14 +3980,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='ets-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='ets-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -4019,7 +4019,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='ets-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4041,7 +4041,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='ets-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4073,7 +4073,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='ets-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4082,7 +4082,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ets-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='ets-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/gbm/index.html b/docs/API/models/gbm/index.html
index b9600280a..8af6701ca 100644
--- a/docs/API/models/gbm/index.html
+++ b/docs/API/models/gbm/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3382,7 +3382,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='gradientboostingmachine-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='gradientboostingmachine-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gradientboostingmachine-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gradientboostingmachine-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gradientboostingmachine-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gradientboostingmachine-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gradientboostingmachine-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gradientboostingmachine-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gradientboostingmachine-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gradientboostingmachine-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gradientboostingmachine-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gradientboostingmachine-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gradientboostingmachine-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gradientboostingmachine-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='gradientboostingmachine-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gradientboostingmachine-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='gradientboostingmachine-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gradientboostingmachine-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gradientboostingmachine-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gradientboostingmachine-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gradientboostingmachine-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gradientboostingmachine-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gradientboostingmachine-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gradientboostingmachine-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gradientboostingmachine-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gradientboostingmachine-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gradientboostingmachine-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gradientboostingmachine-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gradientboostingmachine-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='gradientboostingmachine-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gradientboostingmachine-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3399,16 +3399,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gradientboostingmachine-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gradientboostingmachine-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='gradientboostingmachine-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gradientboostingmachine-best_trial">best trial</a>.</div><a id='gradientboostingmachine-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gradientboostingmachine-best_trial">best trial</a>.</div><a id='gradientboostingmachine-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gradientboostingmachine-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gradientboostingmachine-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gradientboostingmachine-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gradientboostingmachine-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gradientboostingmachine-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gradientboostingmachine-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='gradientboostingmachine-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gradientboostingmachine-best_trial">best trial</a>.</div><a id='gradientboostingmachine-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gradientboostingmachine-best_trial">best trial</a>.</div><a id='gradientboostingmachine-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gradientboostingmachine-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gradientboostingmachine-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gradientboostingmachine-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gradientboostingmachine-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gradientboostingmachine-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gradientboostingmachine-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#gradientboostingmachine-score_bootstrap">score_bootstrap</a>.</div><a id='gradientboostingmachine-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gradientboostingmachine-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gradientboostingmachine-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gradientboostingmachine-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#gradientboostingmachine-score_bootstrap">score_bootstrap</a>.</div><a id='gradientboostingmachine-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gradientboostingmachine-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gradientboostingmachine-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gradientboostingmachine-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gradientboostingmachine-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='gradientboostingmachine-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='gradientboostingmachine-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3521,7 +3521,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='gradientboostingmachine-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3538,7 +3538,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='gradientboostingmachine-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3583,7 +3583,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='gradientboostingmachine-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3598,7 +3598,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='gradientboostingmachine-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3610,7 +3610,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='gradientboostingmachine-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3641,7 +3641,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='gradientboostingmachine-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3655,14 +3655,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='gradientboostingmachine-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3674,7 +3674,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='gradientboostingmachine-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#gradientboostingmachine-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3707,7 +3707,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='gradientboostingmachine-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3742,7 +3742,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='gradientboostingmachine-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3757,7 +3757,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='gradientboostingmachine-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='gradientboostingmachine-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3792,7 +3792,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='gradientboostingmachine-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3805,7 +3805,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='gradientboostingmachine-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3837,7 +3837,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='gradientboostingmachine-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3849,7 +3849,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='gradientboostingmachine-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3867,14 +3867,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='gradientboostingmachine-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3885,7 +3885,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='gradientboostingmachine-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3904,7 +3904,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='gradientboostingmachine-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3922,9 +3922,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='gradientboostingmachine-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='gradientboostingmachine-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3935,14 +3935,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='gradientboostingmachine-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='gradientboostingmachine-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3982,7 +3982,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='gradientboostingmachine-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4004,7 +4004,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='gradientboostingmachine-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4036,7 +4036,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='gradientboostingmachine-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4045,7 +4045,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gradientboostingmachine-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='gradientboostingmachine-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/gnb/index.html b/docs/API/models/gnb/index.html
index 2e1383ffc..a9eb8b207 100644
--- a/docs/API/models/gnb/index.html
+++ b/docs/API/models/gnb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3346,7 +3346,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='gaussiannb-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='gaussiannb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gaussiannb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gaussiannb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gaussiannb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gaussiannb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gaussiannb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gaussiannb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gaussiannb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gaussiannb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gaussiannb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gaussiannb-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gaussiannb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gaussiannb-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='gaussiannb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gaussiannb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='gaussiannb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gaussiannb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gaussiannb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gaussiannb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gaussiannb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gaussiannb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gaussiannb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gaussiannb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gaussiannb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gaussiannb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gaussiannb-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gaussiannb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gaussiannb-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='gaussiannb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gaussiannb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3363,16 +3363,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gaussiannb-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gaussiannb-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='gaussiannb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gaussiannb-best_trial">best trial</a>.</div><a id='gaussiannb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gaussiannb-best_trial">best trial</a>.</div><a id='gaussiannb-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gaussiannb-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gaussiannb-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gaussiannb-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gaussiannb-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gaussiannb-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gaussiannb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='gaussiannb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gaussiannb-best_trial">best trial</a>.</div><a id='gaussiannb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gaussiannb-best_trial">best trial</a>.</div><a id='gaussiannb-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gaussiannb-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gaussiannb-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gaussiannb-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gaussiannb-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gaussiannb-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gaussiannb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#gaussiannb-score_bootstrap">score_bootstrap</a>.</div><a id='gaussiannb-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gaussiannb-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gaussiannb-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gaussiannb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#gaussiannb-score_bootstrap">score_bootstrap</a>.</div><a id='gaussiannb-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gaussiannb-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gaussiannb-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gaussiannb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3456,7 +3456,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussiannb-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='gaussiannb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='gaussiannb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3467,7 +3467,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='gaussiannb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3484,7 +3484,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='gaussiannb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3529,7 +3529,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='gaussiannb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3544,7 +3544,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='gaussiannb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='gaussiannb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3556,7 +3556,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='gaussiannb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3587,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='gaussiannb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3601,14 +3601,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='gaussiannb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3620,7 +3620,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='gaussiannb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#gaussiannb-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3653,7 +3653,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='gaussiannb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3688,7 +3688,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='gaussiannb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3703,7 +3703,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='gaussiannb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3726,7 +3726,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='gaussiannb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3738,7 +3738,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='gaussiannb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3751,7 +3751,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='gaussiannb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3783,7 +3783,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='gaussiannb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3795,7 +3795,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='gaussiannb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3813,14 +3813,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='gaussiannb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3831,7 +3831,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='gaussiannb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3850,7 +3850,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='gaussiannb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3868,9 +3868,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='gaussiannb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='gaussiannb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='gaussiannb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3881,14 +3881,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='gaussiannb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='gaussiannb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3901,7 +3901,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3928,7 +3928,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='gaussiannb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3950,7 +3950,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='gaussiannb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3982,7 +3982,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='gaussiannb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3991,7 +3991,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussiannb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='gaussiannb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/gp/index.html b/docs/API/models/gp/index.html
index de806ce5e..c58766fa9 100644
--- a/docs/API/models/gp/index.html
+++ b/docs/API/models/gp/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3361,7 +3361,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='gaussianprocess-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='gaussianprocess-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gaussianprocess-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gaussianprocess-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gaussianprocess-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gaussianprocess-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gaussianprocess-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gaussianprocess-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gaussianprocess-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gaussianprocess-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gaussianprocess-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gaussianprocess-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gaussianprocess-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gaussianprocess-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='gaussianprocess-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gaussianprocess-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='gaussianprocess-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='gaussianprocess-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='gaussianprocess-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='gaussianprocess-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='gaussianprocess-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='gaussianprocess-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='gaussianprocess-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='gaussianprocess-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='gaussianprocess-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='gaussianprocess-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='gaussianprocess-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='gaussianprocess-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='gaussianprocess-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='gaussianprocess-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='gaussianprocess-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3378,16 +3378,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gaussianprocess-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='gaussianprocess-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='gaussianprocess-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gaussianprocess-best_trial">best trial</a>.</div><a id='gaussianprocess-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gaussianprocess-best_trial">best trial</a>.</div><a id='gaussianprocess-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gaussianprocess-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gaussianprocess-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gaussianprocess-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gaussianprocess-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gaussianprocess-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gaussianprocess-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='gaussianprocess-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#gaussianprocess-best_trial">best trial</a>.</div><a id='gaussianprocess-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#gaussianprocess-best_trial">best trial</a>.</div><a id='gaussianprocess-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='gaussianprocess-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='gaussianprocess-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='gaussianprocess-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='gaussianprocess-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='gaussianprocess-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='gaussianprocess-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#gaussianprocess-score_bootstrap">score_bootstrap</a>.</div><a id='gaussianprocess-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gaussianprocess-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gaussianprocess-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gaussianprocess-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#gaussianprocess-score_bootstrap">score_bootstrap</a>.</div><a id='gaussianprocess-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='gaussianprocess-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='gaussianprocess-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='gaussianprocess-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3471,7 +3471,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#gaussianprocess-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='gaussianprocess-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='gaussianprocess-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3482,7 +3482,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='gaussianprocess-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3499,7 +3499,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='gaussianprocess-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3544,7 +3544,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='gaussianprocess-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3559,7 +3559,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='gaussianprocess-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='gaussianprocess-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3571,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='gaussianprocess-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3602,7 +3602,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='gaussianprocess-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3616,14 +3616,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='gaussianprocess-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3635,7 +3635,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='gaussianprocess-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#gaussianprocess-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3668,7 +3668,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='gaussianprocess-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3703,7 +3703,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='gaussianprocess-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3718,7 +3718,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='gaussianprocess-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3741,7 +3741,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='gaussianprocess-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3753,7 +3753,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='gaussianprocess-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3766,7 +3766,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='gaussianprocess-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3798,7 +3798,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='gaussianprocess-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3810,7 +3810,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='gaussianprocess-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3828,14 +3828,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='gaussianprocess-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3846,7 +3846,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='gaussianprocess-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3865,7 +3865,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='gaussianprocess-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3883,9 +3883,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='gaussianprocess-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='gaussianprocess-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='gaussianprocess-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3896,14 +3896,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='gaussianprocess-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='gaussianprocess-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3916,7 +3916,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3943,7 +3943,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='gaussianprocess-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3965,7 +3965,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='gaussianprocess-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3997,7 +3997,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='gaussianprocess-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4006,7 +4006,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='gaussianprocess-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='gaussianprocess-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/hgbm/index.html b/docs/API/models/hgbm/index.html
index 830781cb2..70613ee49 100644
--- a/docs/API/models/hgbm/index.html
+++ b/docs/API/models/hgbm/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3378,7 +3378,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='histgradientboosting-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='histgradientboosting-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='histgradientboosting-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='histgradientboosting-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='histgradientboosting-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='histgradientboosting-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='histgradientboosting-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='histgradientboosting-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='histgradientboosting-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='histgradientboosting-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='histgradientboosting-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='histgradientboosting-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='histgradientboosting-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='histgradientboosting-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='histgradientboosting-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='histgradientboosting-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='histgradientboosting-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='histgradientboosting-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='histgradientboosting-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='histgradientboosting-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='histgradientboosting-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='histgradientboosting-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='histgradientboosting-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='histgradientboosting-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='histgradientboosting-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='histgradientboosting-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='histgradientboosting-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='histgradientboosting-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='histgradientboosting-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='histgradientboosting-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='histgradientboosting-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3395,16 +3395,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='histgradientboosting-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='histgradientboosting-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='histgradientboosting-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#histgradientboosting-best_trial">best trial</a>.</div><a id='histgradientboosting-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#histgradientboosting-best_trial">best trial</a>.</div><a id='histgradientboosting-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='histgradientboosting-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='histgradientboosting-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='histgradientboosting-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='histgradientboosting-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='histgradientboosting-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='histgradientboosting-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='histgradientboosting-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#histgradientboosting-best_trial">best trial</a>.</div><a id='histgradientboosting-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#histgradientboosting-best_trial">best trial</a>.</div><a id='histgradientboosting-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='histgradientboosting-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='histgradientboosting-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='histgradientboosting-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='histgradientboosting-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='histgradientboosting-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='histgradientboosting-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#histgradientboosting-score_bootstrap">score_bootstrap</a>.</div><a id='histgradientboosting-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='histgradientboosting-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='histgradientboosting-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='histgradientboosting-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#histgradientboosting-score_bootstrap">score_bootstrap</a>.</div><a id='histgradientboosting-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='histgradientboosting-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='histgradientboosting-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='histgradientboosting-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3506,7 +3506,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#histgradientboosting-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='histgradientboosting-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='histgradientboosting-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3517,7 +3517,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='histgradientboosting-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3534,7 +3534,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='histgradientboosting-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3579,7 +3579,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='histgradientboosting-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3594,7 +3594,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='histgradientboosting-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='histgradientboosting-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3606,7 +3606,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='histgradientboosting-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3637,7 +3637,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='histgradientboosting-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3651,14 +3651,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='histgradientboosting-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3670,7 +3670,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='histgradientboosting-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#histgradientboosting-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3703,7 +3703,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='histgradientboosting-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3738,7 +3738,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='histgradientboosting-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3753,7 +3753,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='histgradientboosting-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3776,7 +3776,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='histgradientboosting-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3788,7 +3788,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='histgradientboosting-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3801,7 +3801,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='histgradientboosting-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3833,7 +3833,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='histgradientboosting-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3845,7 +3845,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='histgradientboosting-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3863,14 +3863,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='histgradientboosting-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3881,7 +3881,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='histgradientboosting-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3900,7 +3900,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='histgradientboosting-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3918,9 +3918,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='histgradientboosting-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='histgradientboosting-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='histgradientboosting-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3931,14 +3931,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='histgradientboosting-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='histgradientboosting-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3951,7 +3951,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3978,7 +3978,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='histgradientboosting-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4000,7 +4000,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='histgradientboosting-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4032,7 +4032,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='histgradientboosting-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4041,7 +4041,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='histgradientboosting-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='histgradientboosting-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/huber/index.html b/docs/API/models/huber/index.html
index d94083322..49be8e229 100644
--- a/docs/API/models/huber/index.html
+++ b/docs/API/models/huber/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3366,7 +3366,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='huberregression-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='huberregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='huberregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='huberregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='huberregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='huberregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='huberregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='huberregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='huberregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='huberregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='huberregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='huberregression-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='huberregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='huberregression-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='huberregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='huberregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='huberregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='huberregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='huberregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='huberregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='huberregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='huberregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='huberregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='huberregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='huberregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='huberregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='huberregression-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='huberregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='huberregression-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='huberregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='huberregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3383,16 +3383,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='huberregression-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='huberregression-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='huberregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#huberregression-best_trial">best trial</a>.</div><a id='huberregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#huberregression-best_trial">best trial</a>.</div><a id='huberregression-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='huberregression-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='huberregression-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='huberregression-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='huberregression-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='huberregression-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='huberregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='huberregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#huberregression-best_trial">best trial</a>.</div><a id='huberregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#huberregression-best_trial">best trial</a>.</div><a id='huberregression-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='huberregression-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='huberregression-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='huberregression-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='huberregression-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='huberregression-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='huberregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#huberregression-score_bootstrap">score_bootstrap</a>.</div><a id='huberregression-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='huberregression-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='huberregression-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='huberregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#huberregression-score_bootstrap">score_bootstrap</a>.</div><a id='huberregression-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='huberregression-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='huberregression-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='huberregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3440,7 +3440,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#huberregression-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#huberregression-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='huberregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='huberregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3451,7 +3451,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='huberregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3468,7 +3468,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='huberregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3513,7 +3513,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='huberregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3528,7 +3528,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='huberregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='huberregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3540,7 +3540,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='huberregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3571,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='huberregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3585,14 +3585,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='huberregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3604,7 +3604,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='huberregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#huberregression-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3637,7 +3637,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='huberregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3672,7 +3672,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='huberregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3687,7 +3687,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='huberregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3710,7 +3710,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='huberregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3722,7 +3722,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='huberregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3735,7 +3735,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='huberregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3767,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='huberregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3779,7 +3779,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='huberregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3797,14 +3797,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='huberregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3815,7 +3815,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='huberregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3834,7 +3834,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='huberregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3852,9 +3852,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='huberregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='huberregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='huberregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3865,14 +3865,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='huberregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='huberregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3885,7 +3885,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3912,7 +3912,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='huberregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3934,7 +3934,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='huberregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3966,7 +3966,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='huberregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3975,7 +3975,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='huberregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='huberregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/knn/index.html b/docs/API/models/knn/index.html
index 6dc9fb717..a35aad23d 100644
--- a/docs/API/models/knn/index.html
+++ b/docs/API/models/knn/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3417,7 +3417,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='knearestneighbors-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='knearestneighbors-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='knearestneighbors-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='knearestneighbors-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='knearestneighbors-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='knearestneighbors-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='knearestneighbors-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='knearestneighbors-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='knearestneighbors-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='knearestneighbors-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='knearestneighbors-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='knearestneighbors-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='knearestneighbors-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='knearestneighbors-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='knearestneighbors-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='knearestneighbors-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='knearestneighbors-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='knearestneighbors-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='knearestneighbors-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='knearestneighbors-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='knearestneighbors-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='knearestneighbors-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='knearestneighbors-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='knearestneighbors-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='knearestneighbors-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='knearestneighbors-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='knearestneighbors-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='knearestneighbors-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='knearestneighbors-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='knearestneighbors-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='knearestneighbors-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3434,16 +3434,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='knearestneighbors-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='knearestneighbors-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='knearestneighbors-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#knearestneighbors-best_trial">best trial</a>.</div><a id='knearestneighbors-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#knearestneighbors-best_trial">best trial</a>.</div><a id='knearestneighbors-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='knearestneighbors-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='knearestneighbors-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='knearestneighbors-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='knearestneighbors-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='knearestneighbors-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='knearestneighbors-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='knearestneighbors-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#knearestneighbors-best_trial">best trial</a>.</div><a id='knearestneighbors-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#knearestneighbors-best_trial">best trial</a>.</div><a id='knearestneighbors-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='knearestneighbors-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='knearestneighbors-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='knearestneighbors-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='knearestneighbors-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='knearestneighbors-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='knearestneighbors-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#knearestneighbors-score_bootstrap">score_bootstrap</a>.</div><a id='knearestneighbors-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='knearestneighbors-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='knearestneighbors-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='knearestneighbors-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#knearestneighbors-score_bootstrap">score_bootstrap</a>.</div><a id='knearestneighbors-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='knearestneighbors-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='knearestneighbors-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='knearestneighbors-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#knearestneighbors-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='knearestneighbors-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='knearestneighbors-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3538,7 +3538,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='knearestneighbors-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3555,7 +3555,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='knearestneighbors-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3600,7 +3600,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='knearestneighbors-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3615,7 +3615,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='knearestneighbors-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='knearestneighbors-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3627,7 +3627,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='knearestneighbors-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3658,7 +3658,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='knearestneighbors-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3672,14 +3672,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='knearestneighbors-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3691,7 +3691,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='knearestneighbors-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#knearestneighbors-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3724,7 +3724,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='knearestneighbors-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3759,7 +3759,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='knearestneighbors-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3774,7 +3774,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='knearestneighbors-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3797,7 +3797,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='knearestneighbors-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3809,7 +3809,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='knearestneighbors-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3822,7 +3822,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='knearestneighbors-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3854,7 +3854,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='knearestneighbors-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3866,7 +3866,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='knearestneighbors-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3884,14 +3884,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='knearestneighbors-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3902,7 +3902,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='knearestneighbors-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3921,7 +3921,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='knearestneighbors-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3939,9 +3939,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='knearestneighbors-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='knearestneighbors-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='knearestneighbors-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3952,14 +3952,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='knearestneighbors-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='knearestneighbors-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3972,7 +3972,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3999,7 +3999,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='knearestneighbors-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4021,7 +4021,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='knearestneighbors-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4053,7 +4053,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='knearestneighbors-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4062,7 +4062,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='knearestneighbors-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='knearestneighbors-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lars/index.html b/docs/API/models/lars/index.html
index 589bd4f51..ff200436c 100644
--- a/docs/API/models/lars/index.html
+++ b/docs/API/models/lars/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3349,7 +3349,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='leastangleregression-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='leastangleregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='leastangleregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='leastangleregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='leastangleregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='leastangleregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='leastangleregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='leastangleregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='leastangleregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='leastangleregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='leastangleregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='leastangleregression-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='leastangleregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='leastangleregression-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='leastangleregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='leastangleregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='leastangleregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='leastangleregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='leastangleregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='leastangleregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='leastangleregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='leastangleregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='leastangleregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='leastangleregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='leastangleregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='leastangleregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='leastangleregression-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='leastangleregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='leastangleregression-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='leastangleregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='leastangleregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3366,16 +3366,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='leastangleregression-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='leastangleregression-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='leastangleregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#leastangleregression-best_trial">best trial</a>.</div><a id='leastangleregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#leastangleregression-best_trial">best trial</a>.</div><a id='leastangleregression-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='leastangleregression-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='leastangleregression-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='leastangleregression-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='leastangleregression-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='leastangleregression-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='leastangleregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='leastangleregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#leastangleregression-best_trial">best trial</a>.</div><a id='leastangleregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#leastangleregression-best_trial">best trial</a>.</div><a id='leastangleregression-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='leastangleregression-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='leastangleregression-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='leastangleregression-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='leastangleregression-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='leastangleregression-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='leastangleregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#leastangleregression-score_bootstrap">score_bootstrap</a>.</div><a id='leastangleregression-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='leastangleregression-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='leastangleregression-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='leastangleregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#leastangleregression-score_bootstrap">score_bootstrap</a>.</div><a id='leastangleregression-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='leastangleregression-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='leastangleregression-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='leastangleregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3423,7 +3423,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#leastangleregression-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='leastangleregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='leastangleregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3434,7 +3434,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='leastangleregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3451,7 +3451,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='leastangleregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='leastangleregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='leastangleregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='leastangleregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3523,7 +3523,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='leastangleregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3554,7 +3554,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='leastangleregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3568,14 +3568,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='leastangleregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3587,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='leastangleregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#leastangleregression-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3620,7 +3620,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='leastangleregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3655,7 +3655,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='leastangleregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3670,7 +3670,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='leastangleregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3693,7 +3693,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='leastangleregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3705,7 +3705,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='leastangleregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3718,7 +3718,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='leastangleregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3750,7 +3750,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='leastangleregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3762,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='leastangleregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3780,14 +3780,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='leastangleregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3798,7 +3798,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='leastangleregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3817,7 +3817,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='leastangleregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3835,9 +3835,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='leastangleregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='leastangleregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='leastangleregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3848,14 +3848,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='leastangleregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='leastangleregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3868,7 +3868,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3895,7 +3895,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='leastangleregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3917,7 +3917,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='leastangleregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3949,7 +3949,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='leastangleregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3958,7 +3958,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='leastangleregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='leastangleregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lasso/index.html b/docs/API/models/lasso/index.html
index ebc6d8334..f886efadc 100644
--- a/docs/API/models/lasso/index.html
+++ b/docs/API/models/lasso/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3383,7 +3383,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='lasso-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='lasso-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lasso-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lasso-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lasso-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lasso-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lasso-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lasso-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lasso-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lasso-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lasso-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lasso-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lasso-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lasso-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='lasso-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lasso-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='lasso-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lasso-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lasso-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lasso-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lasso-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lasso-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lasso-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lasso-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lasso-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lasso-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lasso-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lasso-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lasso-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='lasso-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lasso-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3400,16 +3400,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lasso-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lasso-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='lasso-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lasso-best_trial">best trial</a>.</div><a id='lasso-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lasso-best_trial">best trial</a>.</div><a id='lasso-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lasso-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lasso-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lasso-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lasso-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lasso-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lasso-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='lasso-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lasso-best_trial">best trial</a>.</div><a id='lasso-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lasso-best_trial">best trial</a>.</div><a id='lasso-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lasso-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lasso-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lasso-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lasso-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lasso-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lasso-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#lasso-score_bootstrap">score_bootstrap</a>.</div><a id='lasso-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lasso-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lasso-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lasso-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#lasso-score_bootstrap">score_bootstrap</a>.</div><a id='lasso-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lasso-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lasso-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lasso-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3457,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#lasso-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lasso-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='lasso-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='lasso-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3468,7 +3468,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='lasso-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='lasso-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3530,7 +3530,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='lasso-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3545,7 +3545,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='lasso-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='lasso-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3557,7 +3557,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='lasso-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3588,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='lasso-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3602,14 +3602,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='lasso-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3621,7 +3621,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='lasso-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#lasso-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3654,7 +3654,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='lasso-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3689,7 +3689,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='lasso-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3704,7 +3704,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='lasso-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3727,7 +3727,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='lasso-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3739,7 +3739,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='lasso-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3752,7 +3752,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='lasso-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3784,7 +3784,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='lasso-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3796,7 +3796,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='lasso-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3814,14 +3814,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='lasso-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3832,7 +3832,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='lasso-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3851,7 +3851,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='lasso-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3869,9 +3869,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='lasso-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='lasso-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='lasso-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3882,14 +3882,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='lasso-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='lasso-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3902,7 +3902,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3929,7 +3929,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='lasso-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3951,7 +3951,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='lasso-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3983,7 +3983,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='lasso-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3992,7 +3992,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lasso-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='lasso-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lda/index.html b/docs/API/models/lda/index.html
index 790debeb2..f9cc33f34 100644
--- a/docs/API/models/lda/index.html
+++ b/docs/API/models/lda/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3368,7 +3368,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='lineardiscriminantanalysis-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='lineardiscriminantanalysis-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lineardiscriminantanalysis-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lineardiscriminantanalysis-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lineardiscriminantanalysis-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lineardiscriminantanalysis-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lineardiscriminantanalysis-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lineardiscriminantanalysis-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lineardiscriminantanalysis-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lineardiscriminantanalysis-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lineardiscriminantanalysis-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lineardiscriminantanalysis-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lineardiscriminantanalysis-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lineardiscriminantanalysis-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='lineardiscriminantanalysis-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lineardiscriminantanalysis-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='lineardiscriminantanalysis-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lineardiscriminantanalysis-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lineardiscriminantanalysis-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lineardiscriminantanalysis-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lineardiscriminantanalysis-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lineardiscriminantanalysis-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lineardiscriminantanalysis-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lineardiscriminantanalysis-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lineardiscriminantanalysis-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lineardiscriminantanalysis-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lineardiscriminantanalysis-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lineardiscriminantanalysis-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lineardiscriminantanalysis-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='lineardiscriminantanalysis-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lineardiscriminantanalysis-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3385,16 +3385,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lineardiscriminantanalysis-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lineardiscriminantanalysis-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='lineardiscriminantanalysis-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-best_trial">best trial</a>.</div><a id='lineardiscriminantanalysis-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-best_trial">best trial</a>.</div><a id='lineardiscriminantanalysis-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lineardiscriminantanalysis-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lineardiscriminantanalysis-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lineardiscriminantanalysis-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lineardiscriminantanalysis-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lineardiscriminantanalysis-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lineardiscriminantanalysis-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='lineardiscriminantanalysis-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-best_trial">best trial</a>.</div><a id='lineardiscriminantanalysis-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-best_trial">best trial</a>.</div><a id='lineardiscriminantanalysis-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lineardiscriminantanalysis-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lineardiscriminantanalysis-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lineardiscriminantanalysis-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lineardiscriminantanalysis-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lineardiscriminantanalysis-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lineardiscriminantanalysis-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-score_bootstrap">score_bootstrap</a>.</div><a id='lineardiscriminantanalysis-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lineardiscriminantanalysis-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lineardiscriminantanalysis-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lineardiscriminantanalysis-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-score_bootstrap">score_bootstrap</a>.</div><a id='lineardiscriminantanalysis-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lineardiscriminantanalysis-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lineardiscriminantanalysis-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lineardiscriminantanalysis-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='lineardiscriminantanalysis-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='lineardiscriminantanalysis-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3507,7 +3507,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='lineardiscriminantanalysis-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3524,7 +3524,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='lineardiscriminantanalysis-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3569,7 +3569,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='lineardiscriminantanalysis-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3584,7 +3584,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='lineardiscriminantanalysis-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3596,7 +3596,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='lineardiscriminantanalysis-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3627,7 +3627,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='lineardiscriminantanalysis-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3641,14 +3641,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='lineardiscriminantanalysis-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3660,7 +3660,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='lineardiscriminantanalysis-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#lineardiscriminantanalysis-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3693,7 +3693,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='lineardiscriminantanalysis-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3728,7 +3728,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='lineardiscriminantanalysis-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3743,7 +3743,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='lineardiscriminantanalysis-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3766,7 +3766,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='lineardiscriminantanalysis-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3778,7 +3778,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='lineardiscriminantanalysis-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3791,7 +3791,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='lineardiscriminantanalysis-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3823,7 +3823,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='lineardiscriminantanalysis-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3835,7 +3835,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='lineardiscriminantanalysis-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3853,14 +3853,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='lineardiscriminantanalysis-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3871,7 +3871,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='lineardiscriminantanalysis-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3890,7 +3890,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='lineardiscriminantanalysis-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3908,9 +3908,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='lineardiscriminantanalysis-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='lineardiscriminantanalysis-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3921,14 +3921,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='lineardiscriminantanalysis-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='lineardiscriminantanalysis-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3941,7 +3941,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3968,7 +3968,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='lineardiscriminantanalysis-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3990,7 +3990,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='lineardiscriminantanalysis-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4022,7 +4022,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='lineardiscriminantanalysis-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4031,7 +4031,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lineardiscriminantanalysis-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='lineardiscriminantanalysis-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lgb/index.html b/docs/API/models/lgb/index.html
index de339ac8e..53d32239f 100644
--- a/docs/API/models/lgb/index.html
+++ b/docs/API/models/lgb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3385,7 +3385,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='lightgbm-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='lightgbm-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lightgbm-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lightgbm-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lightgbm-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lightgbm-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lightgbm-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lightgbm-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lightgbm-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lightgbm-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lightgbm-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lightgbm-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lightgbm-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lightgbm-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='lightgbm-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lightgbm-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='lightgbm-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='lightgbm-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='lightgbm-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='lightgbm-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='lightgbm-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='lightgbm-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='lightgbm-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='lightgbm-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='lightgbm-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='lightgbm-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='lightgbm-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='lightgbm-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='lightgbm-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='lightgbm-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='lightgbm-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3402,19 +3402,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lightgbm-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='lightgbm-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='lightgbm-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lightgbm-best_trial">best trial</a>.</div><a id='lightgbm-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lightgbm-best_trial">best trial</a>.</div><a id='lightgbm-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lightgbm-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lightgbm-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='lightgbm-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#lightgbm-best_trial">best trial</a>.</div><a id='lightgbm-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#lightgbm-best_trial">best trial</a>.</div><a id='lightgbm-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='lightgbm-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='lightgbm-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='lightgbm-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lightgbm-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lightgbm-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lightgbm-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lightgbm-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='lightgbm-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='lightgbm-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='lightgbm-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='lightgbm-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='lightgbm-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#lightgbm-score_bootstrap">score_bootstrap</a>.</div><a id='lightgbm-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lightgbm-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lightgbm-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lightgbm-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#lightgbm-score_bootstrap">score_bootstrap</a>.</div><a id='lightgbm-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='lightgbm-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='lightgbm-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='lightgbm-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3516,7 +3516,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#lightgbm-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='lightgbm-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='lightgbm-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='lightgbm-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3544,7 +3544,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='lightgbm-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3589,7 +3589,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='lightgbm-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3604,7 +3604,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='lightgbm-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='lightgbm-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3616,7 +3616,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='lightgbm-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3647,7 +3647,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='lightgbm-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3661,14 +3661,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='lightgbm-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3680,7 +3680,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='lightgbm-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#lightgbm-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3713,7 +3713,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='lightgbm-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3748,7 +3748,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='lightgbm-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3763,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='lightgbm-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3786,7 +3786,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='lightgbm-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3798,7 +3798,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='lightgbm-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3811,7 +3811,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='lightgbm-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3843,7 +3843,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='lightgbm-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3855,7 +3855,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='lightgbm-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3873,14 +3873,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='lightgbm-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3891,7 +3891,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='lightgbm-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3910,7 +3910,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='lightgbm-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3928,9 +3928,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='lightgbm-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='lightgbm-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='lightgbm-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3941,14 +3941,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='lightgbm-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='lightgbm-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3961,7 +3961,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3988,7 +3988,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='lightgbm-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4010,7 +4010,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='lightgbm-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4042,7 +4042,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='lightgbm-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4051,7 +4051,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='lightgbm-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='lightgbm-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lr/index.html b/docs/API/models/lr/index.html
index 1aaf9af85..9ec451439 100644
--- a/docs/API/models/lr/index.html
+++ b/docs/API/models/lr/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3388,7 +3388,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='logisticregression-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='logisticregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='logisticregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='logisticregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='logisticregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='logisticregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='logisticregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='logisticregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='logisticregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='logisticregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='logisticregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='logisticregression-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='logisticregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='logisticregression-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='logisticregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='logisticregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='logisticregression-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='logisticregression-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='logisticregression-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='logisticregression-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='logisticregression-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='logisticregression-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='logisticregression-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='logisticregression-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='logisticregression-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='logisticregression-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='logisticregression-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='logisticregression-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='logisticregression-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='logisticregression-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='logisticregression-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3405,16 +3405,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='logisticregression-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='logisticregression-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='logisticregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#logisticregression-best_trial">best trial</a>.</div><a id='logisticregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#logisticregression-best_trial">best trial</a>.</div><a id='logisticregression-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='logisticregression-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='logisticregression-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='logisticregression-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='logisticregression-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='logisticregression-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='logisticregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='logisticregression-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#logisticregression-best_trial">best trial</a>.</div><a id='logisticregression-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#logisticregression-best_trial">best trial</a>.</div><a id='logisticregression-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='logisticregression-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='logisticregression-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='logisticregression-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='logisticregression-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='logisticregression-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='logisticregression-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#logisticregression-score_bootstrap">score_bootstrap</a>.</div><a id='logisticregression-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='logisticregression-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='logisticregression-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='logisticregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#logisticregression-score_bootstrap">score_bootstrap</a>.</div><a id='logisticregression-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='logisticregression-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='logisticregression-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='logisticregression-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3516,7 +3516,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#logisticregression-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='logisticregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='logisticregression-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='logisticregression-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3544,7 +3544,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='logisticregression-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3589,7 +3589,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='logisticregression-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3604,7 +3604,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='logisticregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='logisticregression-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3616,7 +3616,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='logisticregression-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3647,7 +3647,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='logisticregression-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3661,14 +3661,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='logisticregression-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3680,7 +3680,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='logisticregression-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#logisticregression-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3713,7 +3713,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='logisticregression-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3748,7 +3748,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='logisticregression-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3763,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='logisticregression-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3786,7 +3786,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='logisticregression-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3798,7 +3798,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='logisticregression-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3811,7 +3811,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='logisticregression-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3843,7 +3843,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='logisticregression-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3855,7 +3855,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='logisticregression-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3873,14 +3873,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='logisticregression-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3891,7 +3891,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='logisticregression-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3910,7 +3910,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='logisticregression-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3928,9 +3928,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='logisticregression-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='logisticregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='logisticregression-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3941,14 +3941,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='logisticregression-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='logisticregression-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3961,7 +3961,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3988,7 +3988,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='logisticregression-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4010,7 +4010,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='logisticregression-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4042,7 +4042,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='logisticregression-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4051,7 +4051,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='logisticregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='logisticregression-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/lsvm/index.html b/docs/API/models/lsvm/index.html
index f7dd805b1..0a81bc6d0 100644
--- a/docs/API/models/lsvm/index.html
+++ b/docs/API/models/lsvm/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3393,7 +3393,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='linearsvm-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='linearsvm-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='linearsvm-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='linearsvm-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='linearsvm-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='linearsvm-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='linearsvm-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='linearsvm-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='linearsvm-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='linearsvm-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='linearsvm-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='linearsvm-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='linearsvm-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='linearsvm-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='linearsvm-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='linearsvm-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='linearsvm-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='linearsvm-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='linearsvm-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='linearsvm-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='linearsvm-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='linearsvm-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='linearsvm-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='linearsvm-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='linearsvm-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='linearsvm-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='linearsvm-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='linearsvm-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='linearsvm-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='linearsvm-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='linearsvm-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3410,16 +3410,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='linearsvm-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='linearsvm-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='linearsvm-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#linearsvm-best_trial">best trial</a>.</div><a id='linearsvm-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#linearsvm-best_trial">best trial</a>.</div><a id='linearsvm-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='linearsvm-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='linearsvm-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='linearsvm-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='linearsvm-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='linearsvm-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='linearsvm-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='linearsvm-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#linearsvm-best_trial">best trial</a>.</div><a id='linearsvm-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#linearsvm-best_trial">best trial</a>.</div><a id='linearsvm-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='linearsvm-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='linearsvm-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='linearsvm-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='linearsvm-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='linearsvm-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='linearsvm-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#linearsvm-score_bootstrap">score_bootstrap</a>.</div><a id='linearsvm-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='linearsvm-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='linearsvm-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='linearsvm-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#linearsvm-score_bootstrap">score_bootstrap</a>.</div><a id='linearsvm-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='linearsvm-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='linearsvm-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='linearsvm-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#linearsvm-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='linearsvm-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='linearsvm-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='linearsvm-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3513,7 +3513,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='linearsvm-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3558,7 +3558,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='linearsvm-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3573,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='linearsvm-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='linearsvm-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3585,7 +3585,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='linearsvm-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3616,7 +3616,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='linearsvm-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3630,14 +3630,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='linearsvm-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3649,7 +3649,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='linearsvm-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#linearsvm-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3682,7 +3682,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='linearsvm-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='linearsvm-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3732,7 +3732,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='linearsvm-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3755,7 +3755,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='linearsvm-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3767,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='linearsvm-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='linearsvm-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3812,7 +3812,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='linearsvm-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3824,7 +3824,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='linearsvm-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3842,14 +3842,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='linearsvm-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3860,7 +3860,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='linearsvm-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3879,7 +3879,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='linearsvm-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3897,9 +3897,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='linearsvm-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='linearsvm-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='linearsvm-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3910,14 +3910,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='linearsvm-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='linearsvm-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3930,7 +3930,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3957,7 +3957,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='linearsvm-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3979,7 +3979,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='linearsvm-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4011,7 +4011,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='linearsvm-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4020,7 +4020,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='linearsvm-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='linearsvm-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/mlp/index.html b/docs/API/models/mlp/index.html
index cac85ea8b..37c3f0eb5 100644
--- a/docs/API/models/mlp/index.html
+++ b/docs/API/models/mlp/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3377,7 +3377,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='multilayerperceptron-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='multilayerperceptron-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='multilayerperceptron-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='multilayerperceptron-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='multilayerperceptron-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='multilayerperceptron-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='multilayerperceptron-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='multilayerperceptron-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='multilayerperceptron-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='multilayerperceptron-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='multilayerperceptron-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='multilayerperceptron-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='multilayerperceptron-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='multilayerperceptron-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='multilayerperceptron-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='multilayerperceptron-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='multilayerperceptron-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='multilayerperceptron-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='multilayerperceptron-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='multilayerperceptron-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='multilayerperceptron-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='multilayerperceptron-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='multilayerperceptron-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='multilayerperceptron-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='multilayerperceptron-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='multilayerperceptron-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='multilayerperceptron-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='multilayerperceptron-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='multilayerperceptron-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='multilayerperceptron-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='multilayerperceptron-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3394,19 +3394,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='multilayerperceptron-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='multilayerperceptron-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='multilayerperceptron-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#multilayerperceptron-best_trial">best trial</a>.</div><a id='multilayerperceptron-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#multilayerperceptron-best_trial">best trial</a>.</div><a id='multilayerperceptron-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='multilayerperceptron-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='multilayerperceptron-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='multilayerperceptron-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#multilayerperceptron-best_trial">best trial</a>.</div><a id='multilayerperceptron-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#multilayerperceptron-best_trial">best trial</a>.</div><a id='multilayerperceptron-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='multilayerperceptron-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='multilayerperceptron-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='multilayerperceptron-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='multilayerperceptron-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='multilayerperceptron-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='multilayerperceptron-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='multilayerperceptron-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='multilayerperceptron-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='multilayerperceptron-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='multilayerperceptron-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='multilayerperceptron-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='multilayerperceptron-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#multilayerperceptron-score_bootstrap">score_bootstrap</a>.</div><a id='multilayerperceptron-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='multilayerperceptron-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='multilayerperceptron-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='multilayerperceptron-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#multilayerperceptron-score_bootstrap">score_bootstrap</a>.</div><a id='multilayerperceptron-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='multilayerperceptron-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='multilayerperceptron-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='multilayerperceptron-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3490,7 +3490,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multilayerperceptron-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='multilayerperceptron-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='multilayerperceptron-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3501,7 +3501,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='multilayerperceptron-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3518,7 +3518,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='multilayerperceptron-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3563,7 +3563,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='multilayerperceptron-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3578,7 +3578,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='multilayerperceptron-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='multilayerperceptron-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3590,7 +3590,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='multilayerperceptron-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3621,7 +3621,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='multilayerperceptron-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3635,14 +3635,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='multilayerperceptron-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3654,7 +3654,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='multilayerperceptron-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#multilayerperceptron-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3687,7 +3687,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='multilayerperceptron-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3722,7 +3722,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='multilayerperceptron-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3737,7 +3737,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='multilayerperceptron-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3760,7 +3760,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='multilayerperceptron-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3772,7 +3772,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='multilayerperceptron-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3785,7 +3785,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='multilayerperceptron-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3817,7 +3817,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='multilayerperceptron-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3829,7 +3829,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='multilayerperceptron-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3847,14 +3847,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='multilayerperceptron-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3865,7 +3865,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='multilayerperceptron-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3884,7 +3884,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='multilayerperceptron-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3902,9 +3902,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='multilayerperceptron-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='multilayerperceptron-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='multilayerperceptron-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3915,14 +3915,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='multilayerperceptron-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='multilayerperceptron-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3935,7 +3935,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3962,7 +3962,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='multilayerperceptron-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3984,7 +3984,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='multilayerperceptron-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4016,7 +4016,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='multilayerperceptron-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4025,7 +4025,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multilayerperceptron-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='multilayerperceptron-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/mnb/index.html b/docs/API/models/mnb/index.html
index 3dd541f02..f0cf3b024 100644
--- a/docs/API/models/mnb/index.html
+++ b/docs/API/models/mnb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3375,7 +3375,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='multinomialnb-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='multinomialnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='multinomialnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='multinomialnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='multinomialnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='multinomialnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='multinomialnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='multinomialnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='multinomialnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='multinomialnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='multinomialnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='multinomialnb-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='multinomialnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='multinomialnb-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='multinomialnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='multinomialnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='multinomialnb-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='multinomialnb-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='multinomialnb-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='multinomialnb-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='multinomialnb-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='multinomialnb-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='multinomialnb-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='multinomialnb-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='multinomialnb-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='multinomialnb-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='multinomialnb-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='multinomialnb-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='multinomialnb-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='multinomialnb-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='multinomialnb-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3392,16 +3392,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='multinomialnb-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='multinomialnb-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='multinomialnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#multinomialnb-best_trial">best trial</a>.</div><a id='multinomialnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#multinomialnb-best_trial">best trial</a>.</div><a id='multinomialnb-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='multinomialnb-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='multinomialnb-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='multinomialnb-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='multinomialnb-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='multinomialnb-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='multinomialnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='multinomialnb-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#multinomialnb-best_trial">best trial</a>.</div><a id='multinomialnb-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#multinomialnb-best_trial">best trial</a>.</div><a id='multinomialnb-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='multinomialnb-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='multinomialnb-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='multinomialnb-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='multinomialnb-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='multinomialnb-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='multinomialnb-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#multinomialnb-score_bootstrap">score_bootstrap</a>.</div><a id='multinomialnb-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='multinomialnb-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='multinomialnb-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='multinomialnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#multinomialnb-score_bootstrap">score_bootstrap</a>.</div><a id='multinomialnb-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='multinomialnb-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='multinomialnb-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='multinomialnb-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#multinomialnb-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='multinomialnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='multinomialnb-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='multinomialnb-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3513,7 +3513,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='multinomialnb-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3558,7 +3558,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='multinomialnb-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3573,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='multinomialnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='multinomialnb-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3585,7 +3585,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='multinomialnb-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3616,7 +3616,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='multinomialnb-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3630,14 +3630,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='multinomialnb-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3649,7 +3649,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='multinomialnb-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#multinomialnb-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3682,7 +3682,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='multinomialnb-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='multinomialnb-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3732,7 +3732,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='multinomialnb-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3755,7 +3755,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='multinomialnb-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3767,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='multinomialnb-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='multinomialnb-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3812,7 +3812,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='multinomialnb-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3824,7 +3824,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='multinomialnb-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3842,14 +3842,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='multinomialnb-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3860,7 +3860,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='multinomialnb-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3879,7 +3879,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='multinomialnb-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3897,9 +3897,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='multinomialnb-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='multinomialnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='multinomialnb-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3910,14 +3910,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='multinomialnb-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='multinomialnb-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3930,7 +3930,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3957,7 +3957,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='multinomialnb-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3979,7 +3979,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='multinomialnb-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4011,7 +4011,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='multinomialnb-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4020,7 +4020,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='multinomialnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='multinomialnb-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/nf/index.html b/docs/API/models/nf/index.html
index c3c69420e..b9f5ca054 100644
--- a/docs/API/models/nf/index.html
+++ b/docs/API/models/nf/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3366,7 +3366,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='naiveforecaster-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='naiveforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='naiveforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='naiveforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='naiveforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='naiveforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='naiveforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='naiveforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='naiveforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='naiveforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='naiveforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='naiveforecaster-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='naiveforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='naiveforecaster-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='naiveforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='naiveforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='naiveforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='naiveforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='naiveforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='naiveforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='naiveforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='naiveforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='naiveforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='naiveforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='naiveforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='naiveforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='naiveforecaster-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='naiveforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='naiveforecaster-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='naiveforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='naiveforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3383,16 +3383,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='naiveforecaster-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='naiveforecaster-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='naiveforecaster-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#naiveforecaster-best_trial">best trial</a>.</div><a id='naiveforecaster-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#naiveforecaster-best_trial">best trial</a>.</div><a id='naiveforecaster-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='naiveforecaster-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='naiveforecaster-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='naiveforecaster-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='naiveforecaster-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='naiveforecaster-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='naiveforecaster-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='naiveforecaster-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#naiveforecaster-best_trial">best trial</a>.</div><a id='naiveforecaster-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#naiveforecaster-best_trial">best trial</a>.</div><a id='naiveforecaster-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='naiveforecaster-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='naiveforecaster-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='naiveforecaster-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='naiveforecaster-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='naiveforecaster-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='naiveforecaster-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#naiveforecaster-score_bootstrap">score_bootstrap</a>.</div><a id='naiveforecaster-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='naiveforecaster-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='naiveforecaster-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='naiveforecaster-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#naiveforecaster-score_bootstrap">score_bootstrap</a>.</div><a id='naiveforecaster-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='naiveforecaster-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='naiveforecaster-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='naiveforecaster-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3500,7 +3500,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#naiveforecaster-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='naiveforecaster-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='naiveforecaster-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3511,7 +3511,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='naiveforecaster-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3528,7 +3528,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='naiveforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3573,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='naiveforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3588,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='naiveforecaster-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='naiveforecaster-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3600,7 +3600,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='naiveforecaster-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3631,7 +3631,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='naiveforecaster-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3645,7 +3645,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='naiveforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#naiveforecaster-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3678,7 +3678,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='naiveforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3713,7 +3713,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='naiveforecaster-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3728,7 +3728,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='naiveforecaster-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3751,7 +3751,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='naiveforecaster-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3763,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='naiveforecaster-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3776,7 +3776,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='naiveforecaster-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3808,7 +3808,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='naiveforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3820,7 +3820,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3840,7 +3840,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3862,7 +3862,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3883,7 +3883,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3907,7 +3907,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3926,7 +3926,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='naiveforecaster-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3949,7 +3949,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='naiveforecaster-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3967,9 +3967,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='naiveforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='naiveforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='naiveforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3980,14 +3980,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='naiveforecaster-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='naiveforecaster-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -4019,7 +4019,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='naiveforecaster-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4041,7 +4041,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='naiveforecaster-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4073,7 +4073,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='naiveforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4082,7 +4082,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='naiveforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='naiveforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/ols/index.html b/docs/API/models/ols/index.html
index 79d871235..c56424e08 100644
--- a/docs/API/models/ols/index.html
+++ b/docs/API/models/ols/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3348,7 +3348,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='ordinaryleastsquares-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='ordinaryleastsquares-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ordinaryleastsquares-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ordinaryleastsquares-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ordinaryleastsquares-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ordinaryleastsquares-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ordinaryleastsquares-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ordinaryleastsquares-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ordinaryleastsquares-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ordinaryleastsquares-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ordinaryleastsquares-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ordinaryleastsquares-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ordinaryleastsquares-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ordinaryleastsquares-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='ordinaryleastsquares-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ordinaryleastsquares-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='ordinaryleastsquares-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ordinaryleastsquares-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ordinaryleastsquares-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ordinaryleastsquares-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ordinaryleastsquares-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ordinaryleastsquares-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ordinaryleastsquares-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ordinaryleastsquares-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ordinaryleastsquares-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ordinaryleastsquares-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ordinaryleastsquares-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ordinaryleastsquares-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ordinaryleastsquares-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='ordinaryleastsquares-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ordinaryleastsquares-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3365,16 +3365,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ordinaryleastsquares-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ordinaryleastsquares-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='ordinaryleastsquares-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ordinaryleastsquares-best_trial">best trial</a>.</div><a id='ordinaryleastsquares-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ordinaryleastsquares-best_trial">best trial</a>.</div><a id='ordinaryleastsquares-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ordinaryleastsquares-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ordinaryleastsquares-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ordinaryleastsquares-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ordinaryleastsquares-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ordinaryleastsquares-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ordinaryleastsquares-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='ordinaryleastsquares-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ordinaryleastsquares-best_trial">best trial</a>.</div><a id='ordinaryleastsquares-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ordinaryleastsquares-best_trial">best trial</a>.</div><a id='ordinaryleastsquares-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ordinaryleastsquares-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ordinaryleastsquares-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ordinaryleastsquares-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ordinaryleastsquares-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ordinaryleastsquares-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ordinaryleastsquares-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#ordinaryleastsquares-score_bootstrap">score_bootstrap</a>.</div><a id='ordinaryleastsquares-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ordinaryleastsquares-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ordinaryleastsquares-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ordinaryleastsquares-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#ordinaryleastsquares-score_bootstrap">score_bootstrap</a>.</div><a id='ordinaryleastsquares-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ordinaryleastsquares-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ordinaryleastsquares-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ordinaryleastsquares-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3422,7 +3422,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ordinaryleastsquares-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='ordinaryleastsquares-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='ordinaryleastsquares-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3433,7 +3433,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='ordinaryleastsquares-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3450,7 +3450,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='ordinaryleastsquares-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3495,7 +3495,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='ordinaryleastsquares-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='ordinaryleastsquares-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3522,7 +3522,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='ordinaryleastsquares-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3553,7 +3553,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='ordinaryleastsquares-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3567,14 +3567,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='ordinaryleastsquares-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3586,7 +3586,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='ordinaryleastsquares-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#ordinaryleastsquares-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3619,7 +3619,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='ordinaryleastsquares-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3654,7 +3654,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='ordinaryleastsquares-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3669,7 +3669,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='ordinaryleastsquares-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3692,7 +3692,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='ordinaryleastsquares-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3704,7 +3704,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='ordinaryleastsquares-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='ordinaryleastsquares-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3749,7 +3749,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='ordinaryleastsquares-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3761,7 +3761,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='ordinaryleastsquares-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3779,14 +3779,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='ordinaryleastsquares-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3797,7 +3797,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='ordinaryleastsquares-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3816,7 +3816,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='ordinaryleastsquares-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3834,9 +3834,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='ordinaryleastsquares-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='ordinaryleastsquares-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3847,14 +3847,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='ordinaryleastsquares-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='ordinaryleastsquares-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3867,7 +3867,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3894,7 +3894,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='ordinaryleastsquares-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3916,7 +3916,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='ordinaryleastsquares-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3948,7 +3948,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='ordinaryleastsquares-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3957,7 +3957,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ordinaryleastsquares-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='ordinaryleastsquares-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/omp/index.html b/docs/API/models/omp/index.html
index f0a425511..bfcc4bb3f 100644
--- a/docs/API/models/omp/index.html
+++ b/docs/API/models/omp/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3346,7 +3346,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='orthogonalmatchingpursuit-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='orthogonalmatchingpursuit-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='orthogonalmatchingpursuit-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='orthogonalmatchingpursuit-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='orthogonalmatchingpursuit-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='orthogonalmatchingpursuit-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='orthogonalmatchingpursuit-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='orthogonalmatchingpursuit-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='orthogonalmatchingpursuit-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='orthogonalmatchingpursuit-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='orthogonalmatchingpursuit-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='orthogonalmatchingpursuit-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='orthogonalmatchingpursuit-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='orthogonalmatchingpursuit-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='orthogonalmatchingpursuit-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='orthogonalmatchingpursuit-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='orthogonalmatchingpursuit-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='orthogonalmatchingpursuit-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='orthogonalmatchingpursuit-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='orthogonalmatchingpursuit-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='orthogonalmatchingpursuit-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='orthogonalmatchingpursuit-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='orthogonalmatchingpursuit-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='orthogonalmatchingpursuit-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='orthogonalmatchingpursuit-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='orthogonalmatchingpursuit-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='orthogonalmatchingpursuit-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='orthogonalmatchingpursuit-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='orthogonalmatchingpursuit-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='orthogonalmatchingpursuit-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='orthogonalmatchingpursuit-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3363,16 +3363,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='orthogonalmatchingpursuit-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='orthogonalmatchingpursuit-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='orthogonalmatchingpursuit-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-best_trial">best trial</a>.</div><a id='orthogonalmatchingpursuit-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-best_trial">best trial</a>.</div><a id='orthogonalmatchingpursuit-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='orthogonalmatchingpursuit-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='orthogonalmatchingpursuit-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='orthogonalmatchingpursuit-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='orthogonalmatchingpursuit-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='orthogonalmatchingpursuit-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='orthogonalmatchingpursuit-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='orthogonalmatchingpursuit-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-best_trial">best trial</a>.</div><a id='orthogonalmatchingpursuit-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-best_trial">best trial</a>.</div><a id='orthogonalmatchingpursuit-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='orthogonalmatchingpursuit-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='orthogonalmatchingpursuit-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='orthogonalmatchingpursuit-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='orthogonalmatchingpursuit-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='orthogonalmatchingpursuit-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='orthogonalmatchingpursuit-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-score_bootstrap">score_bootstrap</a>.</div><a id='orthogonalmatchingpursuit-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='orthogonalmatchingpursuit-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='orthogonalmatchingpursuit-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='orthogonalmatchingpursuit-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-score_bootstrap">score_bootstrap</a>.</div><a id='orthogonalmatchingpursuit-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='orthogonalmatchingpursuit-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='orthogonalmatchingpursuit-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='orthogonalmatchingpursuit-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3420,7 +3420,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='orthogonalmatchingpursuit-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='orthogonalmatchingpursuit-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3431,7 +3431,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='orthogonalmatchingpursuit-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3448,7 +3448,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='orthogonalmatchingpursuit-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3493,7 +3493,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='orthogonalmatchingpursuit-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3508,7 +3508,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='orthogonalmatchingpursuit-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3520,7 +3520,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='orthogonalmatchingpursuit-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3551,7 +3551,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='orthogonalmatchingpursuit-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3565,14 +3565,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='orthogonalmatchingpursuit-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3584,7 +3584,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='orthogonalmatchingpursuit-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#orthogonalmatchingpursuit-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3617,7 +3617,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='orthogonalmatchingpursuit-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3652,7 +3652,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='orthogonalmatchingpursuit-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3667,7 +3667,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='orthogonalmatchingpursuit-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3690,7 +3690,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='orthogonalmatchingpursuit-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3702,7 +3702,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='orthogonalmatchingpursuit-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3715,7 +3715,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='orthogonalmatchingpursuit-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3747,7 +3747,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='orthogonalmatchingpursuit-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3759,7 +3759,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='orthogonalmatchingpursuit-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3777,14 +3777,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='orthogonalmatchingpursuit-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3795,7 +3795,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='orthogonalmatchingpursuit-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3814,7 +3814,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='orthogonalmatchingpursuit-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3832,9 +3832,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='orthogonalmatchingpursuit-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='orthogonalmatchingpursuit-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3845,14 +3845,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='orthogonalmatchingpursuit-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='orthogonalmatchingpursuit-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3865,7 +3865,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3892,7 +3892,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='orthogonalmatchingpursuit-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3914,7 +3914,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='orthogonalmatchingpursuit-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3946,7 +3946,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='orthogonalmatchingpursuit-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='orthogonalmatchingpursuit-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='orthogonalmatchingpursuit-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/pa/index.html b/docs/API/models/pa/index.html
index 36d8589ee..9014549f2 100644
--- a/docs/API/models/pa/index.html
+++ b/docs/API/models/pa/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3375,7 +3375,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='passiveaggressive-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='passiveaggressive-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='passiveaggressive-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='passiveaggressive-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='passiveaggressive-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='passiveaggressive-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='passiveaggressive-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='passiveaggressive-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='passiveaggressive-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='passiveaggressive-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='passiveaggressive-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='passiveaggressive-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='passiveaggressive-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='passiveaggressive-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='passiveaggressive-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='passiveaggressive-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='passiveaggressive-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='passiveaggressive-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='passiveaggressive-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='passiveaggressive-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='passiveaggressive-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='passiveaggressive-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='passiveaggressive-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='passiveaggressive-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='passiveaggressive-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='passiveaggressive-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='passiveaggressive-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='passiveaggressive-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='passiveaggressive-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='passiveaggressive-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='passiveaggressive-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3392,19 +3392,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='passiveaggressive-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='passiveaggressive-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='passiveaggressive-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#passiveaggressive-best_trial">best trial</a>.</div><a id='passiveaggressive-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#passiveaggressive-best_trial">best trial</a>.</div><a id='passiveaggressive-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='passiveaggressive-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='passiveaggressive-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='passiveaggressive-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#passiveaggressive-best_trial">best trial</a>.</div><a id='passiveaggressive-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#passiveaggressive-best_trial">best trial</a>.</div><a id='passiveaggressive-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='passiveaggressive-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='passiveaggressive-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='passiveaggressive-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='passiveaggressive-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='passiveaggressive-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='passiveaggressive-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='passiveaggressive-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='passiveaggressive-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='passiveaggressive-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='passiveaggressive-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='passiveaggressive-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='passiveaggressive-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#passiveaggressive-score_bootstrap">score_bootstrap</a>.</div><a id='passiveaggressive-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='passiveaggressive-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='passiveaggressive-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='passiveaggressive-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#passiveaggressive-score_bootstrap">score_bootstrap</a>.</div><a id='passiveaggressive-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='passiveaggressive-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='passiveaggressive-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='passiveaggressive-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3470,7 +3470,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#passiveaggressive-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='passiveaggressive-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='passiveaggressive-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3481,7 +3481,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='passiveaggressive-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3498,7 +3498,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='passiveaggressive-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3543,7 +3543,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='passiveaggressive-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3558,7 +3558,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='passiveaggressive-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='passiveaggressive-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3570,7 +3570,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='passiveaggressive-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3601,7 +3601,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='passiveaggressive-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3615,14 +3615,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='passiveaggressive-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3634,7 +3634,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='passiveaggressive-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#passiveaggressive-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3667,7 +3667,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='passiveaggressive-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3702,7 +3702,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='passiveaggressive-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3717,7 +3717,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='passiveaggressive-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3740,7 +3740,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='passiveaggressive-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3752,7 +3752,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='passiveaggressive-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3765,7 +3765,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='passiveaggressive-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3797,7 +3797,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='passiveaggressive-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3809,7 +3809,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='passiveaggressive-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3827,14 +3827,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='passiveaggressive-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3845,7 +3845,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='passiveaggressive-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3864,7 +3864,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='passiveaggressive-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3882,9 +3882,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='passiveaggressive-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='passiveaggressive-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='passiveaggressive-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3895,14 +3895,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='passiveaggressive-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='passiveaggressive-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3915,7 +3915,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3942,7 +3942,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='passiveaggressive-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3964,7 +3964,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='passiveaggressive-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3996,7 +3996,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='passiveaggressive-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4005,7 +4005,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='passiveaggressive-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='passiveaggressive-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/perc/index.html b/docs/API/models/perc/index.html
index 6c78dfcbd..32476e169 100644
--- a/docs/API/models/perc/index.html
+++ b/docs/API/models/perc/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3373,7 +3373,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='perceptron-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='perceptron-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='perceptron-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='perceptron-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='perceptron-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='perceptron-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='perceptron-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='perceptron-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='perceptron-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='perceptron-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='perceptron-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='perceptron-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='perceptron-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='perceptron-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='perceptron-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='perceptron-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='perceptron-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='perceptron-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='perceptron-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='perceptron-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='perceptron-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='perceptron-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='perceptron-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='perceptron-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='perceptron-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='perceptron-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='perceptron-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='perceptron-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='perceptron-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='perceptron-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='perceptron-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3390,19 +3390,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='perceptron-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='perceptron-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='perceptron-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#perceptron-best_trial">best trial</a>.</div><a id='perceptron-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#perceptron-best_trial">best trial</a>.</div><a id='perceptron-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='perceptron-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='perceptron-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='perceptron-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#perceptron-best_trial">best trial</a>.</div><a id='perceptron-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#perceptron-best_trial">best trial</a>.</div><a id='perceptron-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='perceptron-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='perceptron-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='perceptron-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='perceptron-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='perceptron-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='perceptron-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='perceptron-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='perceptron-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='perceptron-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='perceptron-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='perceptron-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='perceptron-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#perceptron-score_bootstrap">score_bootstrap</a>.</div><a id='perceptron-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='perceptron-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='perceptron-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='perceptron-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#perceptron-score_bootstrap">score_bootstrap</a>.</div><a id='perceptron-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='perceptron-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='perceptron-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='perceptron-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3468,7 +3468,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#perceptron-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#perceptron-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='perceptron-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='perceptron-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3479,7 +3479,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='perceptron-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='perceptron-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3541,7 +3541,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='perceptron-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3556,7 +3556,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='perceptron-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='perceptron-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3568,7 +3568,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='perceptron-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3599,7 +3599,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='perceptron-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3613,14 +3613,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='perceptron-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3632,7 +3632,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='perceptron-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#perceptron-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3665,7 +3665,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='perceptron-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3700,7 +3700,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='perceptron-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3715,7 +3715,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='perceptron-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3738,7 +3738,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='perceptron-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3750,7 +3750,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='perceptron-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3763,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='perceptron-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3795,7 +3795,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='perceptron-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3807,7 +3807,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='perceptron-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3825,14 +3825,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='perceptron-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3843,7 +3843,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='perceptron-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3862,7 +3862,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='perceptron-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3880,9 +3880,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='perceptron-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='perceptron-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='perceptron-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3893,14 +3893,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='perceptron-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='perceptron-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3913,7 +3913,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3940,7 +3940,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='perceptron-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3962,7 +3962,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='perceptron-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -3994,7 +3994,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='perceptron-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4003,7 +4003,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='perceptron-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='perceptron-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/pt/index.html b/docs/API/models/pt/index.html
index 2af53fa01..e4f97cbc1 100644
--- a/docs/API/models/pt/index.html
+++ b/docs/API/models/pt/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3365,7 +3365,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='polynomialtrend-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='polynomialtrend-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='polynomialtrend-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='polynomialtrend-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='polynomialtrend-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='polynomialtrend-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='polynomialtrend-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='polynomialtrend-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='polynomialtrend-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='polynomialtrend-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='polynomialtrend-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='polynomialtrend-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='polynomialtrend-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='polynomialtrend-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='polynomialtrend-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='polynomialtrend-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='polynomialtrend-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='polynomialtrend-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='polynomialtrend-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='polynomialtrend-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='polynomialtrend-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='polynomialtrend-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='polynomialtrend-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='polynomialtrend-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='polynomialtrend-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='polynomialtrend-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='polynomialtrend-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='polynomialtrend-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='polynomialtrend-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='polynomialtrend-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='polynomialtrend-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3382,16 +3382,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='polynomialtrend-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='polynomialtrend-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='polynomialtrend-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#polynomialtrend-best_trial">best trial</a>.</div><a id='polynomialtrend-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#polynomialtrend-best_trial">best trial</a>.</div><a id='polynomialtrend-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='polynomialtrend-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='polynomialtrend-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='polynomialtrend-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='polynomialtrend-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='polynomialtrend-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='polynomialtrend-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='polynomialtrend-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#polynomialtrend-best_trial">best trial</a>.</div><a id='polynomialtrend-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#polynomialtrend-best_trial">best trial</a>.</div><a id='polynomialtrend-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='polynomialtrend-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='polynomialtrend-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='polynomialtrend-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='polynomialtrend-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='polynomialtrend-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='polynomialtrend-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#polynomialtrend-score_bootstrap">score_bootstrap</a>.</div><a id='polynomialtrend-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='polynomialtrend-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='polynomialtrend-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='polynomialtrend-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#polynomialtrend-score_bootstrap">score_bootstrap</a>.</div><a id='polynomialtrend-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='polynomialtrend-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='polynomialtrend-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='polynomialtrend-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3499,7 +3499,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict_interval">predict_interval</a></td><td>Get prediction intervals on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict_proba">predict_proba</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict_quantiles">predict_quantiles</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict_residuals">predict_residuals</a></td><td>Get residuals of forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-predict_var">predict_var</a></td><td>Get probabilistic forecasts on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#polynomialtrend-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='polynomialtrend-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='polynomialtrend-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='polynomialtrend-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='polynomialtrend-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3572,7 +3572,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='polynomialtrend-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3587,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='polynomialtrend-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='polynomialtrend-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3599,7 +3599,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='polynomialtrend-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3630,7 +3630,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='polynomialtrend-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3644,7 +3644,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='polynomialtrend-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#polynomialtrend-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3677,7 +3677,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='polynomialtrend-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3712,7 +3712,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='polynomialtrend-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3727,7 +3727,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='polynomialtrend-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3750,7 +3750,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='polynomialtrend-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3762,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='polynomialtrend-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3775,7 +3775,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='polynomialtrend-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3807,7 +3807,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='polynomialtrend-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3819,7 +3819,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3401>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(fh, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3397>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3839,7 +3839,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3439>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict_interval'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_interval</strong>(fh, X=None, coverage=0.9, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3435>[source]</a></span></div>Get prediction intervals on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_interval</code> method.</p>
@@ -3861,7 +3861,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3487>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(fh, X=None, marginal=True, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3483>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3882,7 +3882,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3534>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict_quantiles'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_quantiles</strong>(fh, X=None, alpha=[0.05, 0.95], verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3530>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_quantiles</code> method.</p>
@@ -3906,7 +3906,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3584>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_residuals</strong>(y, X=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3580>[source]</a></span></div>Get residuals of forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_residuals</code> method.</p>
@@ -3925,7 +3925,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3621>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
+<p><a id='polynomialtrend-predict_var'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_var</strong>(fh, X=None, cov=False, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3617>[source]</a></span></div>Get probabilistic forecasts on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_var</code> method.</p>
@@ -3948,7 +3948,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='polynomialtrend-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3966,9 +3966,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='polynomialtrend-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='polynomialtrend-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='polynomialtrend-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3979,14 +3979,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='polynomialtrend-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3670>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='polynomialtrend-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(y, X=None, fh=None, metric=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3666>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -4018,7 +4018,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='polynomialtrend-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4040,7 +4040,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='polynomialtrend-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4072,7 +4072,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='polynomialtrend-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4081,7 +4081,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='polynomialtrend-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='polynomialtrend-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/qda/index.html b/docs/API/models/qda/index.html
index ec6d677a0..6e8e23f15 100644
--- a/docs/API/models/qda/index.html
+++ b/docs/API/models/qda/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3368,7 +3368,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='quadraticdiscriminantanalysis-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='quadraticdiscriminantanalysis-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='quadraticdiscriminantanalysis-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='quadraticdiscriminantanalysis-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='quadraticdiscriminantanalysis-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='quadraticdiscriminantanalysis-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='quadraticdiscriminantanalysis-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='quadraticdiscriminantanalysis-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='quadraticdiscriminantanalysis-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='quadraticdiscriminantanalysis-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='quadraticdiscriminantanalysis-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='quadraticdiscriminantanalysis-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='quadraticdiscriminantanalysis-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='quadraticdiscriminantanalysis-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='quadraticdiscriminantanalysis-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='quadraticdiscriminantanalysis-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='quadraticdiscriminantanalysis-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='quadraticdiscriminantanalysis-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='quadraticdiscriminantanalysis-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='quadraticdiscriminantanalysis-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='quadraticdiscriminantanalysis-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='quadraticdiscriminantanalysis-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='quadraticdiscriminantanalysis-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='quadraticdiscriminantanalysis-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='quadraticdiscriminantanalysis-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='quadraticdiscriminantanalysis-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='quadraticdiscriminantanalysis-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='quadraticdiscriminantanalysis-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='quadraticdiscriminantanalysis-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='quadraticdiscriminantanalysis-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='quadraticdiscriminantanalysis-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3385,16 +3385,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='quadraticdiscriminantanalysis-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='quadraticdiscriminantanalysis-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='quadraticdiscriminantanalysis-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-best_trial">best trial</a>.</div><a id='quadraticdiscriminantanalysis-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-best_trial">best trial</a>.</div><a id='quadraticdiscriminantanalysis-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='quadraticdiscriminantanalysis-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='quadraticdiscriminantanalysis-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='quadraticdiscriminantanalysis-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='quadraticdiscriminantanalysis-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='quadraticdiscriminantanalysis-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='quadraticdiscriminantanalysis-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='quadraticdiscriminantanalysis-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-best_trial">best trial</a>.</div><a id='quadraticdiscriminantanalysis-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-best_trial">best trial</a>.</div><a id='quadraticdiscriminantanalysis-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='quadraticdiscriminantanalysis-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='quadraticdiscriminantanalysis-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='quadraticdiscriminantanalysis-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='quadraticdiscriminantanalysis-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='quadraticdiscriminantanalysis-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='quadraticdiscriminantanalysis-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-score_bootstrap">score_bootstrap</a>.</div><a id='quadraticdiscriminantanalysis-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='quadraticdiscriminantanalysis-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='quadraticdiscriminantanalysis-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='quadraticdiscriminantanalysis-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-score_bootstrap">score_bootstrap</a>.</div><a id='quadraticdiscriminantanalysis-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='quadraticdiscriminantanalysis-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='quadraticdiscriminantanalysis-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='quadraticdiscriminantanalysis-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3496,7 +3496,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='quadraticdiscriminantanalysis-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='quadraticdiscriminantanalysis-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3507,7 +3507,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='quadraticdiscriminantanalysis-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3524,7 +3524,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='quadraticdiscriminantanalysis-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3569,7 +3569,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='quadraticdiscriminantanalysis-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3584,7 +3584,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='quadraticdiscriminantanalysis-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3596,7 +3596,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='quadraticdiscriminantanalysis-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3627,7 +3627,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='quadraticdiscriminantanalysis-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3641,14 +3641,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='quadraticdiscriminantanalysis-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3660,7 +3660,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='quadraticdiscriminantanalysis-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#quadraticdiscriminantanalysis-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3693,7 +3693,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='quadraticdiscriminantanalysis-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3728,7 +3728,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='quadraticdiscriminantanalysis-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3743,7 +3743,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='quadraticdiscriminantanalysis-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3766,7 +3766,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='quadraticdiscriminantanalysis-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3778,7 +3778,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='quadraticdiscriminantanalysis-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3791,7 +3791,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='quadraticdiscriminantanalysis-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3823,7 +3823,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='quadraticdiscriminantanalysis-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3835,7 +3835,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='quadraticdiscriminantanalysis-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3853,14 +3853,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='quadraticdiscriminantanalysis-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3871,7 +3871,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='quadraticdiscriminantanalysis-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3890,7 +3890,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='quadraticdiscriminantanalysis-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3908,9 +3908,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='quadraticdiscriminantanalysis-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='quadraticdiscriminantanalysis-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3921,14 +3921,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='quadraticdiscriminantanalysis-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='quadraticdiscriminantanalysis-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3941,7 +3941,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3968,7 +3968,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='quadraticdiscriminantanalysis-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3990,7 +3990,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='quadraticdiscriminantanalysis-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4022,7 +4022,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='quadraticdiscriminantanalysis-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4031,7 +4031,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='quadraticdiscriminantanalysis-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='quadraticdiscriminantanalysis-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/rf/index.html b/docs/API/models/rf/index.html
index 74d5fff0f..d3fd296d9 100644
--- a/docs/API/models/rf/index.html
+++ b/docs/API/models/rf/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3426,7 +3426,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='randomforest-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='randomforest-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='randomforest-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='randomforest-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='randomforest-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='randomforest-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='randomforest-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='randomforest-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='randomforest-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='randomforest-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='randomforest-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='randomforest-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='randomforest-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='randomforest-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='randomforest-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='randomforest-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='randomforest-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='randomforest-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='randomforest-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='randomforest-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='randomforest-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='randomforest-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='randomforest-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='randomforest-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='randomforest-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='randomforest-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='randomforest-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='randomforest-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='randomforest-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='randomforest-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='randomforest-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3443,16 +3443,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='randomforest-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='randomforest-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='randomforest-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#randomforest-best_trial">best trial</a>.</div><a id='randomforest-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#randomforest-best_trial">best trial</a>.</div><a id='randomforest-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='randomforest-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='randomforest-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='randomforest-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='randomforest-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='randomforest-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='randomforest-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='randomforest-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#randomforest-best_trial">best trial</a>.</div><a id='randomforest-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#randomforest-best_trial">best trial</a>.</div><a id='randomforest-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='randomforest-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='randomforest-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='randomforest-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='randomforest-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='randomforest-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='randomforest-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#randomforest-score_bootstrap">score_bootstrap</a>.</div><a id='randomforest-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='randomforest-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='randomforest-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='randomforest-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#randomforest-score_bootstrap">score_bootstrap</a>.</div><a id='randomforest-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='randomforest-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='randomforest-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='randomforest-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3536,7 +3536,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#randomforest-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#randomforest-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='randomforest-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='randomforest-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3547,7 +3547,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='randomforest-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3564,7 +3564,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='randomforest-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3609,7 +3609,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='randomforest-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3624,7 +3624,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='randomforest-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='randomforest-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3636,7 +3636,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='randomforest-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3667,7 +3667,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='randomforest-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3681,14 +3681,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='randomforest-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3700,7 +3700,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='randomforest-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#randomforest-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3733,7 +3733,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='randomforest-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3768,7 +3768,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='randomforest-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3783,7 +3783,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='randomforest-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3806,7 +3806,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='randomforest-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3818,7 +3818,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='randomforest-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3831,7 +3831,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='randomforest-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3863,7 +3863,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='randomforest-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3875,7 +3875,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='randomforest-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3893,14 +3893,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='randomforest-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3911,7 +3911,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='randomforest-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3930,7 +3930,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='randomforest-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3948,9 +3948,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='randomforest-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='randomforest-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='randomforest-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3961,14 +3961,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='randomforest-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='randomforest-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3981,7 +3981,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -4008,7 +4008,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='randomforest-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4030,7 +4030,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='randomforest-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4062,7 +4062,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='randomforest-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4071,7 +4071,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='randomforest-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='randomforest-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/ridge/index.html b/docs/API/models/ridge/index.html
index f81248432..de73780d9 100644
--- a/docs/API/models/ridge/index.html
+++ b/docs/API/models/ridge/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3420,7 +3420,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='ridge-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='ridge-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ridge-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ridge-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ridge-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ridge-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ridge-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ridge-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ridge-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ridge-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ridge-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ridge-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ridge-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ridge-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='ridge-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ridge-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='ridge-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='ridge-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='ridge-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='ridge-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='ridge-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='ridge-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='ridge-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='ridge-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='ridge-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='ridge-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='ridge-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='ridge-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='ridge-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='ridge-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='ridge-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3437,16 +3437,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ridge-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='ridge-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='ridge-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ridge-best_trial">best trial</a>.</div><a id='ridge-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ridge-best_trial">best trial</a>.</div><a id='ridge-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ridge-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ridge-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ridge-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ridge-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ridge-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ridge-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='ridge-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#ridge-best_trial">best trial</a>.</div><a id='ridge-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#ridge-best_trial">best trial</a>.</div><a id='ridge-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='ridge-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='ridge-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='ridge-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='ridge-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='ridge-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='ridge-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#ridge-score_bootstrap">score_bootstrap</a>.</div><a id='ridge-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ridge-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ridge-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ridge-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#ridge-score_bootstrap">score_bootstrap</a>.</div><a id='ridge-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='ridge-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='ridge-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='ridge-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3530,7 +3530,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#ridge-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#ridge-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='ridge-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='ridge-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3541,7 +3541,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='ridge-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3558,7 +3558,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='ridge-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3603,7 +3603,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='ridge-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3618,7 +3618,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='ridge-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='ridge-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3630,7 +3630,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='ridge-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3661,7 +3661,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='ridge-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3675,14 +3675,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='ridge-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3694,7 +3694,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='ridge-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#ridge-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3727,7 +3727,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='ridge-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3762,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='ridge-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3777,7 +3777,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='ridge-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3800,7 +3800,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='ridge-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3812,7 +3812,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='ridge-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3825,7 +3825,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='ridge-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3857,7 +3857,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='ridge-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3869,7 +3869,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='ridge-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3887,14 +3887,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='ridge-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3905,7 +3905,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='ridge-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3924,7 +3924,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='ridge-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3942,9 +3942,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='ridge-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='ridge-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='ridge-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3955,14 +3955,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='ridge-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='ridge-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3975,7 +3975,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -4002,7 +4002,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='ridge-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4024,7 +4024,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='ridge-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4056,7 +4056,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='ridge-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4065,7 +4065,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='ridge-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='ridge-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/rnn/index.html b/docs/API/models/rnn/index.html
index eb97d8567..9d1afdfe3 100644
--- a/docs/API/models/rnn/index.html
+++ b/docs/API/models/rnn/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3389,7 +3389,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='radiusnearestneighbors-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='radiusnearestneighbors-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='radiusnearestneighbors-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='radiusnearestneighbors-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='radiusnearestneighbors-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='radiusnearestneighbors-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='radiusnearestneighbors-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='radiusnearestneighbors-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='radiusnearestneighbors-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='radiusnearestneighbors-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='radiusnearestneighbors-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='radiusnearestneighbors-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='radiusnearestneighbors-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='radiusnearestneighbors-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='radiusnearestneighbors-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='radiusnearestneighbors-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='radiusnearestneighbors-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='radiusnearestneighbors-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='radiusnearestneighbors-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='radiusnearestneighbors-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='radiusnearestneighbors-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='radiusnearestneighbors-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='radiusnearestneighbors-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='radiusnearestneighbors-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='radiusnearestneighbors-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='radiusnearestneighbors-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='radiusnearestneighbors-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='radiusnearestneighbors-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='radiusnearestneighbors-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='radiusnearestneighbors-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='radiusnearestneighbors-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3406,16 +3406,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='radiusnearestneighbors-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='radiusnearestneighbors-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='radiusnearestneighbors-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#radiusnearestneighbors-best_trial">best trial</a>.</div><a id='radiusnearestneighbors-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#radiusnearestneighbors-best_trial">best trial</a>.</div><a id='radiusnearestneighbors-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='radiusnearestneighbors-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='radiusnearestneighbors-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='radiusnearestneighbors-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='radiusnearestneighbors-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='radiusnearestneighbors-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='radiusnearestneighbors-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='radiusnearestneighbors-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#radiusnearestneighbors-best_trial">best trial</a>.</div><a id='radiusnearestneighbors-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#radiusnearestneighbors-best_trial">best trial</a>.</div><a id='radiusnearestneighbors-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='radiusnearestneighbors-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='radiusnearestneighbors-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='radiusnearestneighbors-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='radiusnearestneighbors-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='radiusnearestneighbors-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='radiusnearestneighbors-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#radiusnearestneighbors-score_bootstrap">score_bootstrap</a>.</div><a id='radiusnearestneighbors-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='radiusnearestneighbors-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='radiusnearestneighbors-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='radiusnearestneighbors-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#radiusnearestneighbors-score_bootstrap">score_bootstrap</a>.</div><a id='radiusnearestneighbors-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='radiusnearestneighbors-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='radiusnearestneighbors-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='radiusnearestneighbors-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3499,7 +3499,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#radiusnearestneighbors-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='radiusnearestneighbors-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='radiusnearestneighbors-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='radiusnearestneighbors-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3527,7 +3527,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='radiusnearestneighbors-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3572,7 +3572,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='radiusnearestneighbors-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3587,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='radiusnearestneighbors-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3599,7 +3599,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='radiusnearestneighbors-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3630,7 +3630,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='radiusnearestneighbors-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3644,14 +3644,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='radiusnearestneighbors-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3663,7 +3663,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='radiusnearestneighbors-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#radiusnearestneighbors-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3696,7 +3696,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='radiusnearestneighbors-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3731,7 +3731,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='radiusnearestneighbors-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3746,7 +3746,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='radiusnearestneighbors-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3769,7 +3769,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='radiusnearestneighbors-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3781,7 +3781,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='radiusnearestneighbors-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3794,7 +3794,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='radiusnearestneighbors-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3826,7 +3826,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='radiusnearestneighbors-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3838,7 +3838,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='radiusnearestneighbors-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3856,14 +3856,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='radiusnearestneighbors-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3874,7 +3874,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='radiusnearestneighbors-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3893,7 +3893,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='radiusnearestneighbors-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3911,9 +3911,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='radiusnearestneighbors-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='radiusnearestneighbors-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3924,14 +3924,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='radiusnearestneighbors-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='radiusnearestneighbors-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3944,7 +3944,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3971,7 +3971,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='radiusnearestneighbors-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3993,7 +3993,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='radiusnearestneighbors-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4025,7 +4025,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='radiusnearestneighbors-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4034,7 +4034,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='radiusnearestneighbors-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='radiusnearestneighbors-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/sgd/index.html b/docs/API/models/sgd/index.html
index e8e56f918..5332da139 100644
--- a/docs/API/models/sgd/index.html
+++ b/docs/API/models/sgd/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3376,7 +3376,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='stochasticgradientdescent-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='stochasticgradientdescent-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='stochasticgradientdescent-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='stochasticgradientdescent-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='stochasticgradientdescent-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='stochasticgradientdescent-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='stochasticgradientdescent-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='stochasticgradientdescent-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='stochasticgradientdescent-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='stochasticgradientdescent-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='stochasticgradientdescent-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='stochasticgradientdescent-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='stochasticgradientdescent-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='stochasticgradientdescent-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='stochasticgradientdescent-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='stochasticgradientdescent-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='stochasticgradientdescent-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='stochasticgradientdescent-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='stochasticgradientdescent-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='stochasticgradientdescent-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='stochasticgradientdescent-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='stochasticgradientdescent-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='stochasticgradientdescent-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='stochasticgradientdescent-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='stochasticgradientdescent-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='stochasticgradientdescent-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='stochasticgradientdescent-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='stochasticgradientdescent-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='stochasticgradientdescent-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='stochasticgradientdescent-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='stochasticgradientdescent-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3393,19 +3393,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='stochasticgradientdescent-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='stochasticgradientdescent-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='stochasticgradientdescent-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#stochasticgradientdescent-best_trial">best trial</a>.</div><a id='stochasticgradientdescent-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#stochasticgradientdescent-best_trial">best trial</a>.</div><a id='stochasticgradientdescent-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='stochasticgradientdescent-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='stochasticgradientdescent-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='stochasticgradientdescent-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#stochasticgradientdescent-best_trial">best trial</a>.</div><a id='stochasticgradientdescent-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#stochasticgradientdescent-best_trial">best trial</a>.</div><a id='stochasticgradientdescent-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='stochasticgradientdescent-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='stochasticgradientdescent-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='stochasticgradientdescent-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='stochasticgradientdescent-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='stochasticgradientdescent-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='stochasticgradientdescent-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='stochasticgradientdescent-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='stochasticgradientdescent-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='stochasticgradientdescent-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='stochasticgradientdescent-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='stochasticgradientdescent-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='stochasticgradientdescent-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#stochasticgradientdescent-score_bootstrap">score_bootstrap</a>.</div><a id='stochasticgradientdescent-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='stochasticgradientdescent-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='stochasticgradientdescent-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='stochasticgradientdescent-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#stochasticgradientdescent-score_bootstrap">score_bootstrap</a>.</div><a id='stochasticgradientdescent-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='stochasticgradientdescent-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='stochasticgradientdescent-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='stochasticgradientdescent-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3507,7 +3507,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#stochasticgradientdescent-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='stochasticgradientdescent-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='stochasticgradientdescent-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3518,7 +3518,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='stochasticgradientdescent-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3535,7 +3535,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='stochasticgradientdescent-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3580,7 +3580,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='stochasticgradientdescent-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3595,7 +3595,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='stochasticgradientdescent-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3607,7 +3607,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='stochasticgradientdescent-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3638,7 +3638,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='stochasticgradientdescent-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3652,14 +3652,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='stochasticgradientdescent-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3671,7 +3671,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='stochasticgradientdescent-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#stochasticgradientdescent-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3704,7 +3704,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='stochasticgradientdescent-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3739,7 +3739,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='stochasticgradientdescent-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3754,7 +3754,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='stochasticgradientdescent-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3777,7 +3777,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='stochasticgradientdescent-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3789,7 +3789,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='stochasticgradientdescent-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3802,7 +3802,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='stochasticgradientdescent-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3834,7 +3834,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='stochasticgradientdescent-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3846,7 +3846,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='stochasticgradientdescent-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3864,14 +3864,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='stochasticgradientdescent-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3882,7 +3882,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='stochasticgradientdescent-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3901,7 +3901,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='stochasticgradientdescent-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3919,9 +3919,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='stochasticgradientdescent-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='stochasticgradientdescent-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3932,14 +3932,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='stochasticgradientdescent-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='stochasticgradientdescent-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3952,7 +3952,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3979,7 +3979,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='stochasticgradientdescent-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4001,7 +4001,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='stochasticgradientdescent-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4033,7 +4033,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='stochasticgradientdescent-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4042,7 +4042,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='stochasticgradientdescent-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='stochasticgradientdescent-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/svm/index.html b/docs/API/models/svm/index.html
index 0c9ba11cc..2a5e8d06f 100644
--- a/docs/API/models/svm/index.html
+++ b/docs/API/models/svm/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3418,7 +3418,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='supportvectormachine-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='supportvectormachine-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='supportvectormachine-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='supportvectormachine-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='supportvectormachine-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='supportvectormachine-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='supportvectormachine-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='supportvectormachine-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='supportvectormachine-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='supportvectormachine-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='supportvectormachine-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='supportvectormachine-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='supportvectormachine-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='supportvectormachine-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='supportvectormachine-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='supportvectormachine-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='supportvectormachine-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='supportvectormachine-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='supportvectormachine-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='supportvectormachine-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='supportvectormachine-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='supportvectormachine-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='supportvectormachine-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='supportvectormachine-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='supportvectormachine-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='supportvectormachine-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='supportvectormachine-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='supportvectormachine-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='supportvectormachine-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='supportvectormachine-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='supportvectormachine-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3435,16 +3435,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='supportvectormachine-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='supportvectormachine-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='supportvectormachine-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#supportvectormachine-best_trial">best trial</a>.</div><a id='supportvectormachine-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#supportvectormachine-best_trial">best trial</a>.</div><a id='supportvectormachine-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='supportvectormachine-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='supportvectormachine-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='supportvectormachine-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='supportvectormachine-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='supportvectormachine-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='supportvectormachine-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='supportvectormachine-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#supportvectormachine-best_trial">best trial</a>.</div><a id='supportvectormachine-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#supportvectormachine-best_trial">best trial</a>.</div><a id='supportvectormachine-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='supportvectormachine-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='supportvectormachine-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='supportvectormachine-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='supportvectormachine-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='supportvectormachine-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='supportvectormachine-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#supportvectormachine-score_bootstrap">score_bootstrap</a>.</div><a id='supportvectormachine-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='supportvectormachine-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='supportvectormachine-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='supportvectormachine-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#supportvectormachine-score_bootstrap">score_bootstrap</a>.</div><a id='supportvectormachine-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='supportvectormachine-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='supportvectormachine-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='supportvectormachine-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#supportvectormachine-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='supportvectormachine-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='supportvectormachine-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3521,7 +3521,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='supportvectormachine-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3538,7 +3538,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='supportvectormachine-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3583,7 +3583,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='supportvectormachine-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3598,7 +3598,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='supportvectormachine-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='supportvectormachine-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3610,7 +3610,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='supportvectormachine-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3641,7 +3641,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='supportvectormachine-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3655,14 +3655,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='supportvectormachine-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3674,7 +3674,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='supportvectormachine-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#supportvectormachine-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3707,7 +3707,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='supportvectormachine-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3742,7 +3742,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='supportvectormachine-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3757,7 +3757,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='supportvectormachine-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3780,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='supportvectormachine-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3792,7 +3792,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='supportvectormachine-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3805,7 +3805,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='supportvectormachine-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3837,7 +3837,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='supportvectormachine-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3849,7 +3849,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='supportvectormachine-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3867,14 +3867,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='supportvectormachine-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3885,7 +3885,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='supportvectormachine-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3904,7 +3904,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='supportvectormachine-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3922,9 +3922,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='supportvectormachine-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='supportvectormachine-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='supportvectormachine-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3935,14 +3935,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='supportvectormachine-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='supportvectormachine-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3955,7 +3955,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3982,7 +3982,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='supportvectormachine-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -4004,7 +4004,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='supportvectormachine-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4036,7 +4036,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='supportvectormachine-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4045,7 +4045,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='supportvectormachine-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='supportvectormachine-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/tree/index.html b/docs/API/models/tree/index.html
index a29a2828b..8504f05ab 100644
--- a/docs/API/models/tree/index.html
+++ b/docs/API/models/tree/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3372,7 +3372,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='decisiontree-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='decisiontree-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='decisiontree-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='decisiontree-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='decisiontree-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='decisiontree-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='decisiontree-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='decisiontree-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='decisiontree-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='decisiontree-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='decisiontree-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='decisiontree-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='decisiontree-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='decisiontree-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='decisiontree-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='decisiontree-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='decisiontree-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='decisiontree-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='decisiontree-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='decisiontree-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='decisiontree-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='decisiontree-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='decisiontree-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='decisiontree-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='decisiontree-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='decisiontree-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='decisiontree-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='decisiontree-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='decisiontree-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='decisiontree-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='decisiontree-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3389,16 +3389,16 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='decisiontree-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='decisiontree-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='decisiontree-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#decisiontree-best_trial">best trial</a>.</div><a id='decisiontree-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#decisiontree-best_trial">best trial</a>.</div><a id='decisiontree-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='decisiontree-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='decisiontree-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='decisiontree-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='decisiontree-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='decisiontree-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='decisiontree-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+an example.</div><a id='decisiontree-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#decisiontree-best_trial">best trial</a>.</div><a id='decisiontree-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#decisiontree-best_trial">best trial</a>.</div><a id='decisiontree-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='decisiontree-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='decisiontree-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='decisiontree-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='decisiontree-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='decisiontree-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='decisiontree-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#decisiontree-score_bootstrap">score_bootstrap</a>.</div><a id='decisiontree-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='decisiontree-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='decisiontree-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='decisiontree-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#decisiontree-score_bootstrap">score_bootstrap</a>.</div><a id='decisiontree-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='decisiontree-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='decisiontree-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='decisiontree-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3482,7 +3482,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#decisiontree-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='decisiontree-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='decisiontree-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3493,7 +3493,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='decisiontree-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3510,7 +3510,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='decisiontree-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3555,7 +3555,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='decisiontree-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3570,7 +3570,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='decisiontree-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='decisiontree-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3582,7 +3582,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='decisiontree-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3613,7 +3613,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='decisiontree-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3627,14 +3627,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='decisiontree-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3646,7 +3646,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='decisiontree-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#decisiontree-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3679,7 +3679,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='decisiontree-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3714,7 +3714,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='decisiontree-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3729,7 +3729,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='decisiontree-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3752,7 +3752,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='decisiontree-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3764,7 +3764,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='decisiontree-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3777,7 +3777,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='decisiontree-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3809,7 +3809,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='decisiontree-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3821,7 +3821,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='decisiontree-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3839,14 +3839,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='decisiontree-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3857,7 +3857,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='decisiontree-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3876,7 +3876,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='decisiontree-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3894,9 +3894,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='decisiontree-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='decisiontree-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='decisiontree-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3907,14 +3907,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='decisiontree-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='decisiontree-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3927,7 +3927,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3954,7 +3954,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='decisiontree-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3976,7 +3976,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='decisiontree-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4008,7 +4008,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='decisiontree-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4017,7 +4017,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='decisiontree-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='decisiontree-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/models/xgb/index.html b/docs/API/models/xgb/index.html
index 0fa8ebafc..9251e0b9f 100644
--- a/docs/API/models/xgb/index.html
+++ b/docs/API/models/xgb/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3375,7 +3375,7 @@ <h3 id="data-attributes">Data attributes</h3>
 <p></div><a id='xgboost-mapping'></a><strong>mapping: dict</strong><br><div markdown class='param'>Encoded values and their respective mapped values.</p>
 <p>The column name is the key to its mapping dictionary. Only for
 columns mapped to a single column (e.g. Ordinal, Leave-one-out,
-etc...).</div><a id='xgboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='xgboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='xgboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='xgboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='xgboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='xgboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='xgboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='xgboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='xgboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='xgboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='xgboost-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='xgboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='xgboost-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='xgboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='xgboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+etc...).</div><a id='xgboost-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='xgboost-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='xgboost-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='xgboost-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='xgboost-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='xgboost-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='xgboost-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='xgboost-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='xgboost-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='xgboost-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='xgboost-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='xgboost-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='xgboost-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='xgboost-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='xgboost-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3392,19 +3392,19 @@ <h3 id="utility-attributes">Utility attributes</h3>
 <li><strong>score:</strong> Objective score(s) of the trial.</li>
 <li><strong>time_trial:</strong> Duration of the trial.</li>
 <li><strong>time_ht:</strong> Duration of the hyperparameter tuning.</li>
-<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='xgboost-best_trial'></a><strong>best_trial: <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
+<li><strong>state:</strong> Trial's state (COMPLETE, PRUNED, FAIL).</div><a id='xgboost-best_trial'></a><strong>best_trial: Frozen<a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html">Trial</a> | None</strong><br><div markdown class='param'>Trial that returned the highest score.</li>
 </ul>
 <p>For <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a>, the best trial is the trial that
 performed best on the main metric. Use the property's <code>@setter</code>
 to change the best trial. See [here][example-hyperparameter-tuning]
-an example.</div><a id='xgboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#xgboost-best_trial">best trial</a>.</div><a id='xgboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#xgboost-best_trial">best trial</a>.</div><a id='xgboost-time_ht'></a><strong>time_ht: int | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='xgboost-estimator'></a><strong>estimator: Predictor</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='xgboost-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
+an example.</div><a id='xgboost-best_params'></a><strong>best_params: dict</strong><br><div markdown class='param'>Hyperparameters used by the <a class="autorefs autorefs-internal" href="#xgboost-best_trial">best trial</a>.</div><a id='xgboost-score_ht'></a><strong>score_ht: float | list[float] | None</strong><br><div markdown class='param'>Metric score obtained by the <a class="autorefs autorefs-internal" href="#xgboost-best_trial">best trial</a>.</div><a id='xgboost-time_ht'></a><strong>time_ht: float | None</strong><br><div markdown class='param'>Duration of the hyperparameter tuning (in seconds).</div><a id='xgboost-estimator'></a><strong>estimator: PREDICTOR</strong><br><div markdown class='param'>Estimator fitted on the training set.</div><a id='xgboost-evals'></a><strong>evals: dict</strong><br><div markdown class='param'>Scores obtained per iteration of the training.</p>
 <p>Only the scores of the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#metric">main metric</a> are tracked.
 Included keys are: train and test. Read more in the
-<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='xgboost-score_train'></a><strong>score_train: float | list[float]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='xgboost-score_test'></a><strong>score_test: float | list[float]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='xgboost-score_holdout'></a><strong>score_holdout: float | list[float]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='xgboost-time_fit'></a><strong>time_fit: int</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='xgboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
+<a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">user guide</a>.</div><a id='xgboost-score_train'></a><strong>score_train: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the training set.</div><a id='xgboost-score_test'></a><strong>score_test: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the test set.</div><a id='xgboost-score_holdout'></a><strong>score_holdout: SCALAR | list[SCALAR]</strong><br><div markdown class='param'>Metric score on the holdout set.</div><a id='xgboost-time_fit'></a><strong>time_fit: float</strong><br><div markdown class='param'>Duration of the model fitting on the train set (in seconds).</div><a id='xgboost-bootstrap'></a><strong>bootstrap: pd.DataFrame | None</strong><br><div markdown class='param'>Overview of the bootstrapping scores.</p>
 <p>The dataframe has shape=(n_bootstrap, metric) and shows the
 score obtained by every bootstrapped sample for every metric.
 Using <code>atom.bootstrap.mean()</code> yields the same values as
-<a class="autorefs autorefs-internal" href="#xgboost-score_bootstrap">score_bootstrap</a>.</div><a id='xgboost-score_bootstrap'></a><strong>score_bootstrap: float | list[float] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='xgboost-time_bootstrap'></a><strong>time_bootstrap: int | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='xgboost-time'></a><strong>time: int</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='xgboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
+<a class="autorefs autorefs-internal" href="#xgboost-score_bootstrap">score_bootstrap</a>.</div><a id='xgboost-score_bootstrap'></a><strong>score_bootstrap: SCALAR | list[SCALAR] | None</strong><br><div markdown class='param'>Mean metric score on the bootstrapped samples.</div><a id='xgboost-time_bootstrap'></a><strong>time_bootstrap: float | None</strong><br><div markdown class='param'>Duration of the bootstrapping (in seconds).</div><a id='xgboost-time'></a><strong>time: float</strong><br><div markdown class='param'>Total duration of the run (in seconds).</div><a id='xgboost-feature_importance'></a><strong>feature_importance: pd.Series | None</strong><br><div markdown class='param'>Normalized feature importance scores.</p>
 <p>The sum of importances for all features is 1. The scores are
 extracted from the estimator's <code>scores_</code>, <code>coef_</code> or
 <code>feature_importances_</code> attribute, checked in that order.
@@ -3488,7 +3488,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#xgboost-bootstrapping">bootstrapping</a></td><td>Apply a bootstrap algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-calibrate">calibrate</a></td><td>Calibrate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-clear">clear</a></td><td>Reset attributes and clear cache from the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-create_app">create_app</a></td><td>Create an interactive app to test model predictions.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-create_dashboard">create_dashboard</a></td><td>Create an interactive dashboard to analyze the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-cross_validate">cross_validate</a></td><td>Evaluate the model using cross-validation.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-decision_function">decision_function</a></td><td>Get confidence scores on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-evaluate">evaluate</a></td><td>Get the model's scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-export_pipeline">export_pipeline</a></td><td>Export the model's pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-fit">fit</a></td><td>Fit and validate the model.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-full_train">full_train</a></td><td>Train the estimator on the complete dataset.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-get_best_threshold">get_best_threshold</a></td><td>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-hyperparameter_tuning">hyperparameter_tuning</a></td><td>Run the hyperparameter tuning algorithm.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-inverse_transform">inverse_transform</a></td><td>Inversely transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-predict">predict</a></td><td>Get predictions on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-predict_log_proba">predict_log_proba</a></td><td>Get class log-probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-predict_proba">predict_proba</a></td><td>Get class probabilities on new data or existing rows.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-register">register</a></td><td>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-save_estimator">save_estimator</a></td><td>Save the estimator to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-score">score</a></td><td>Get a metric score on new data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-serve">serve</a></td><td>Serve the model as rest API endpoint for inference.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-transform">transform</a></td><td>Transform new data through the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#xgboost-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='xgboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1234>[source]</a></span></div>Apply a bootstrap algorithm.</p>
+<p><a id='xgboost-bootstrapping'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>bootstrapping</strong>(n_bootstrap, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1230>[source]</a></span></div>Apply a bootstrap algorithm.</p>
 <p>Take bootstrapped samples from the training set and test them
 on the test set to get a distribution of the model's results.</p>
 <table class="table_params">
@@ -3499,7 +3499,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1592>[source]</a></span></div>Calibrate the model.</p>
+<p><a id='xgboost-calibrate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>calibrate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1588>[source]</a></span></div>Calibrate the model.</p>
 <p>Applies probability calibration on the model. The estimator
 is trained via cross-validation on a subset of the training
 data, using the rest to fit the calibrator. The new classifier
@@ -3516,7 +3516,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='xgboost-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3561,7 +3561,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1632>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
+<p><a id='xgboost-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1628>[source]</a></span></div>Reset attributes and clear cache from the model.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="../../ATOM/atomclassifier/#atomclassifier-save">saving</a> the instance. The
@@ -3576,7 +3576,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='xgboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1663>[source]</a></span></div>Create an interactive app to test model predictions.</p>
+<p><a id='xgboost-create_app'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_app</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1659>[source]</a></span></div>Create an interactive app to test model predictions.</p>
 <p>Demo your machine learning model with a friendly web interface.
 This app launches directly in the notebook or on an external
 browser page. The created <a href="https://gradio.app/docs/#interface">Interface</a> instance can be accessed
@@ -3588,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1727>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
+<p><a id='xgboost-create_dashboard'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>create_dashboard</strong>(dataset="test", filename=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1723>[source]</a></span></div>Create an interactive dashboard to analyze the model.</p>
 <p>ATOM uses the <a href="https://github.com/oegedijk/explainerdashboard">explainerdashboard</a>
 package to provide a quick and easy way to analyze and explain
 the predictions and workings of the model. The dashboard allows
@@ -3619,7 +3619,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1830>[source]</a></span></div>Evaluate the model using cross-validation.</p>
+<p><a id='xgboost-cross_validate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>cross_validate</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1826>[source]</a></span></div>Evaluate the model using cross-validation.</p>
 <p>This method cross-validates the whole pipeline on the complete
 dataset. Use it to assess the robustness of the solution's
 performance.</p>
@@ -3633,14 +3633,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2867>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
+<p><a id='xgboost-decision_function'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>decision_function</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2863>[source]</a></span></div>Get confidence scores on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>decision_function</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='decision_function-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='decision_function-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3652,7 +3652,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1887>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
+<p><a id='xgboost-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1883>[source]</a></span></div>Get the model's scores for the provided metrics.</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Use the <a class="autorefs autorefs-internal" href="#xgboost-get_best_threshold">self-get_best_threshold</a> or <a class="autorefs autorefs-internal" href="../../plots/plot_threshold/#plot_threshold">plot_threshold</a>
@@ -3685,7 +3685,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2006>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
+<p><a id='xgboost-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2002>[source]</a></span></div>Export the model's pipeline to a sklearn-like object.</p>
 <p>The returned pipeline is already fitted on the training set.
 Note that, if the model used <a class="autorefs autorefs-internal" href="../../../user_guide/training/#automated-feature-scaling">automated feature scaling</a>,
 the <a class="autorefs autorefs-internal" href="../../data_cleaning/scaler/#scaler">Scaler</a> is added to the pipeline.</p>
@@ -3720,7 +3720,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1118>[source]</a></span></div>Fit and validate the model.</p>
+<p><a id='xgboost-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L1114>[source]</a></span></div>Fit and validate the model.</p>
 <p>The estimator is fitted using the best hyperparameters found
 during hyperparameter tuning. Afterwards, the estimator is
 evaluated on the test set. Only use this method to re-fit the
@@ -3735,7 +3735,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2054>[source]</a></span></div>Train the estimator on the complete dataset.</p>
+<p><a id='xgboost-full_train'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>full_train</strong>(include_holdout=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2050>[source]</a></span></div>Train the estimator on the complete dataset.</p>
 <p>In some cases it might be desirable to use all available data
 to train a final model. Note that doing this means that the
 estimator can no longer be evaluated on the test set. The newly
@@ -3758,7 +3758,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2096>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
+<p><a id='xgboost-get_best_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_best_threshold</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2092>[source]</a></span></div>Get the threshold that maximizes the <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> curve.</p>
 <p>Only available for models with a <code>predict_proba</code> method in a
 binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
@@ -3770,7 +3770,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L812>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
+<p><a id='xgboost-hyperparameter_tuning'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>hyperparameter_tuning</strong>(n_trials, reset=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L808>[source]</a></span></div>Run the hyperparameter tuning algorithm.</p>
 <p>Search for the best combination of hyperparameters. The function
 to optimize is evaluated either with a K-fold cross-validation
 on the training set or using a random train and validation split
@@ -3783,7 +3783,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2137>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
+<p><a id='xgboost-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2133>[source]</a></span></div>Inversely transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. The rest should all implement a <code>inverse_transform</code>
 method. If only <code>X</code> or only <code>y</code> is provided, it ignores
@@ -3815,7 +3815,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='xgboost-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3827,7 +3827,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2902>[source]</a></span></div>Get predictions on new data or existing rows.</p>
+<p><a id='xgboost-predict'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2898>[source]</a></span></div>Get predictions on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict</code> method.</p>
@@ -3845,14 +3845,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2936>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
+<p><a id='xgboost-predict_log_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_log_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2932>[source]</a></span></div>Get class log-probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_log_proba</code> method.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/predicting/#predicting">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='predict_log_proba-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='predict_log_proba-verbose'></a><strong>verbose: int or None, default=None</strong><br><div markdown class='param'>
 Verbosity level of the output. If None, it uses the
@@ -3863,7 +3863,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2970>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
+<p><a id='xgboost-predict_proba'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>predict_proba</strong>(X, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2966>[source]</a></span></div>Get class probabilities on new data or existing rows.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped. The estimator must have a <code>predict_proba</code> method.</p>
@@ -3882,7 +3882,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2198>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
+<p><a id='xgboost-register'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>register</strong>(name=None, stage="None", archive_existing_versions=False)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2194>[source]</a></span></div>Register the model in <a href="https://www.mlflow.org/docs/latest/model-registry.html">mlflow's model registry</a>.</p>
 <p>This method is only available when model <a class="autorefs autorefs-internal" href="../../../user_guide/logging/#tracking">tracking</a> is
 enabled using one of the following URI schemes: databricks,
 http, https, postgresql, mysql, sqlite, mssql.</p>
@@ -3900,9 +3900,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='xgboost-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='xgboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='xgboost-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3913,14 +3913,14 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2246>[source]</a></span></div>Save the estimator to a pickle file.</p>
+<p><a id='xgboost-save_estimator'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save_estimator</strong>(filename="auto")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2242>[source]</a></span></div>Save the estimator to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save_estimator-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3005>[source]</a></span></div>Get a metric score on new data.</p>
+<p><a id='xgboost-score'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>score</strong>(X, y=None, metric=None, sample_weight=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L3001>[source]</a></span></div>Get a metric score on new data.</p>
 <p>New data is first transformed through the model's pipeline.
 Transformers that are only applied on the training set are
 skipped.</p>
@@ -3933,7 +3933,7 @@ <h2 id="methods">Methods</h2>
 </div>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='score-X'></a><strong>X: int, str, slice, sequence or dataframe-like</strong><br><div markdown class='param'>
-Names or indices of rows in the dataset, or new feature
+Names or positions of rows in the dataset, or new feature
 set with shape=(n_samples, n_features).</p>
 <p></div><a id='score-y'></a><strong>y: int, str, dict, sequence, dataframe or None, default=None</strong><br><div markdown class='param'>
 Target column corresponding to X.</p>
@@ -3960,7 +3960,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2264>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
+<p><a id='xgboost-serve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>serve</strong>(method="predict", host="127.0.0.1", port=8000)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2260>[source]</a></span></div>Serve the model as rest API endpoint for inference.</p>
 <p>The complete pipeline is served with the model. The inference
 data must be supplied as json to the HTTP request, e.g.
 <code>requests.get("http://127.0.0.1:8000/", json=X.to_json())</code>.
@@ -3982,7 +3982,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2336>[source]</a></span></div>Transform new data through the pipeline.</p>
+<p><a id='xgboost-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X=None, y=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basemodel.py#L2332>[source]</a></span></div>Transform new data through the pipeline.</p>
 <p>Transformers that are only applied on the training set are
 skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores
 transformers that require the other parameter. This can be
@@ -4014,7 +4014,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='xgboost-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -4023,7 +4023,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='xgboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='xgboost-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
diff --git a/docs/API/nlp/textcleaner/index.html b/docs/API/nlp/textcleaner/index.html
index 1b978fd25..eedc93231 100644
--- a/docs/API/nlp/textcleaner/index.html
+++ b/docs/API/nlp/textcleaner/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="textcleaner">TextCleaner</h1>
 <hr />
-<p><a id='TextCleaner'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>TextCleaner</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L36>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
+<p><a id='TextCleaner'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>TextCleaner</strong>(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L40>[source]</a></span></div>Applies standard text cleaning to the corpus.</p>
 <p>Transformations include normalizing characters and dropping
 noise from the text (emails, HTML tags, URLs, etc...). The
 transformations are applied on the column named <code>corpus</code>, in
@@ -3360,7 +3360,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textcleaner-transform">transform</a></td><td>Apply the transformations to the data.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='textcleaner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='textcleaner-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3385,7 +3385,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textcleaner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='textcleaner-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3429,7 +3429,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textcleaner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='textcleaner-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3453,7 +3453,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textcleaner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='textcleaner-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3465,7 +3465,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textcleaner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='textcleaner-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3485,7 +3485,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textcleaner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L209>[source]</a></span></div>Apply the transformations to the data.</p>
+<p><a id='textcleaner-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L214>[source]</a></span></div>Apply the transformations to the data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If X is
diff --git a/docs/API/nlp/textnormalizer/index.html b/docs/API/nlp/textnormalizer/index.html
index 85fb70120..1c83d1c2f 100644
--- a/docs/API/nlp/textnormalizer/index.html
+++ b/docs/API/nlp/textnormalizer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="textnormalizer">TextNormalizer</h1>
 <hr />
-<p><a id='TextNormalizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>TextNormalizer</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L364>[source]</a></span></div>Normalize the corpus.</p>
+<p><a id='TextNormalizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>TextNormalizer</strong>(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L369>[source]</a></span></div>Normalize the corpus.</p>
 <p>Convert words to a more uniform standard. The transformations
 are applied on the column named <code>corpus</code>, in the same order the
 parameters are presented. If there is no column with that name,
@@ -3347,7 +3347,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#textnormalizer-transform">transform</a></td><td>Normalize the text.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='textnormalizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='textnormalizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3372,7 +3372,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textnormalizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='textnormalizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3416,7 +3416,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textnormalizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='textnormalizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3440,7 +3440,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textnormalizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='textnormalizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3452,7 +3452,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textnormalizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='textnormalizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3472,7 +3472,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='textnormalizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L489>[source]</a></span></div>Normalize the text.</p>
+<p><a id='textnormalizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L495>[source]</a></span></div>Normalize the text.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If X is
diff --git a/docs/API/nlp/tokenizer/index.html b/docs/API/nlp/tokenizer/index.html
index 1d9edecc0..561a470e7 100644
--- a/docs/API/nlp/tokenizer/index.html
+++ b/docs/API/nlp/tokenizer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="tokenizer">Tokenizer</h1>
 <hr />
-<p><a id='Tokenizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>Tokenizer</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L576>[source]</a></span></div>Tokenize the corpus.</p>
+<p><a id='Tokenizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>Tokenizer</strong>(bigram_freq=None, trigram_freq=None, quadgram_freq=None, verbose=0, logger=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L582>[source]</a></span></div>Tokenize the corpus.</p>
 <p>Convert documents into sequences of words. Additionally,
 create n-grams (represented by words united with underscores,
 e.g. "New_York") based on their frequency in the corpus. The
@@ -3352,7 +3352,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-fit">fit</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#tokenizer-transform">transform</a></td><td>Tokenize the text.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='tokenizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L62>[source]</a></span></div>Does nothing.</p>
+<p><a id='tokenizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L69>[source]</a></span></div>Does nothing.</p>
 <p>Implemented for continuity of the API.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
@@ -3377,7 +3377,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='tokenizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='tokenizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3421,7 +3421,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='tokenizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='tokenizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3445,7 +3445,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='tokenizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='tokenizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3457,7 +3457,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='tokenizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='tokenizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3477,7 +3477,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='tokenizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L711>[source]</a></span></div>Tokenize the text.</p>
+<p><a id='tokenizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L718>[source]</a></span></div>Tokenize the text.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If X is
diff --git a/docs/API/nlp/vectorizer/index.html b/docs/API/nlp/vectorizer/index.html
index 0aef8c576..d0321af38 100644
--- a/docs/API/nlp/vectorizer/index.html
+++ b/docs/API/nlp/vectorizer/index.html
@@ -1212,7 +1212,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1254,7 +1254,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1296,7 +1296,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3240,7 +3240,7 @@
 
 <h1 id="vectorizer">Vectorizer</h1>
 <hr />
-<p><a id='Vectorizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>Vectorizer</strong>(strategy="bow", return_sparse=True, device="cpu", engine=None, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L803>[source]</a></span></div>Vectorize text data.</p>
+<p><a id='Vectorizer'></a><div class='sign'><em>class</em> atom.nlp.<strong style='color:#008AB8'>Vectorizer</strong>(strategy="bow", return_sparse=True, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L810>[source]</a></span></div>Vectorize text data.</p>
 <p>Transform the corpus into meaningful vectors of numbers. The
 transformation is applied on the column named <code>corpus</code>. If
 there is no column with that name, an exception is raised.</p>
@@ -3266,17 +3266,16 @@ <h1 id="vectorizer">Vectorizer</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='vectorizer-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='vectorizer-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3284,7 +3283,7 @@ <h1 id="vectorizer">Vectorizer</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"cuml"</li>
 </ul>
 </li>
@@ -3375,7 +3374,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-fit">fit</a></td><td>Fit to data.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-fit_transform">fit_transform</a></td><td>Fit to data, then transform it.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-get_metadata_routing">get_metadata_routing</a></td><td>Get metadata routing of this object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-inverse_transform">inverse_transform</a></td><td>Does nothing.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#vectorizer-transform">transform</a></td><td>Vectorize the text.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='vectorizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L962>[source]</a></span></div>Fit to data.</p>
+<p><a id='vectorizer-fit'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L969>[source]</a></span></div>Fit to data.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If X is
@@ -3388,7 +3387,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='vectorizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L107>[source]</a></span></div>Fit to data, then transform it.</p>
+<p><a id='vectorizer-fit_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>fit_transform</strong>(X=None, y=None, **fit_params)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L114>[source]</a></span></div>Fit to data, then transform it.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='fit_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3432,7 +3431,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='vectorizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L148>[source]</a></span></div>Does nothing.</p>
+<p><a id='vectorizer-inverse_transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>inverse_transform</strong>(X=None, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/data_cleaning.py#L155>[source]</a></span></div>Does nothing.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='inverse_transform-X'></a><strong>X: dataframe-like or None, default=None</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If None,
@@ -3456,7 +3455,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='vectorizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='vectorizer-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3468,7 +3467,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='vectorizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='vectorizer-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3488,7 +3487,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='vectorizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L1018>[source]</a></span></div>Vectorize the text.</p>
+<p><a id='vectorizer-transform'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>transform</strong>(X, y=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/nlp.py#L1025>[source]</a></span></div>Vectorize the text.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='transform-X'></a><strong>X: dataframe-like</strong><br><div markdown class='param'>
 Feature set with shape=(n_samples, n_features). If X is
diff --git a/docs/API/plots/plot_calibration/index.html b/docs/API/plots/plot_calibration/index.html
index 751cdc431..88adc595b 100644
--- a/docs/API/plots/plot_calibration/index.html
+++ b/docs/API/plots/plot_calibration/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_calibration">plot_calibration</h1>
 <hr />
-<p><a id='plot_calibration'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_calibration</strong>(models=None, dataset="test", n_bins=10, target=0, title=None, legend="upper left", figsize=(900, 900), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3946>[source]</a></span></div>Plot the calibration curve for a binary classifier.</p>
+<p><a id='plot_calibration'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_calibration</strong>(models=None, dataset="test", n_bins=10, target=0, title=None, legend="upper left", figsize=(900, 900), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L59>[source]</a></span></div>Plot the calibration curve for a binary classifier.</p>
 <p>Well calibrated classifiers are probabilistic classifiers for
 which the output of the <code>predict_proba</code> method can be directly
 interpreted as a confidence level. For instance a well
diff --git a/docs/API/plots/plot_components/index.html b/docs/API/plots/plot_components/index.html
index 8457eac0c..9cbf19248 100644
--- a/docs/API/plots/plot_components/index.html
+++ b/docs/API/plots/plot_components/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_components">plot_components</h1>
 <hr />
-<p><a id='plot_components'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_components</strong>(show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1158>[source]</a></span></div>Plot the explained variance ratio per component.</p>
+<p><a id='plot_components'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_components</strong>(show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L31>[source]</a></span></div>Plot the explained variance ratio per component.</p>
 <p>Kept components are colored and discarted components are
 transparent. This plot is available only when feature selection
 was applied with strategy="pca".</p>
diff --git a/docs/API/plots/plot_confusion_matrix/index.html b/docs/API/plots/plot_confusion_matrix/index.html
index 4e3c98f13..d5766735d 100644
--- a/docs/API/plots/plot_confusion_matrix/index.html
+++ b/docs/API/plots/plot_confusion_matrix/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_confusion_matrix">plot_confusion_matrix</h1>
 <hr />
-<p><a id='plot_confusion_matrix'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_confusion_matrix</strong>(models=None, dataset="test", target=0, threshold=0.5, title=None, legend="upper right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4132>[source]</a></span></div>Plot a model's confusion matrix.</p>
+<p><a id='plot_confusion_matrix'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_confusion_matrix</strong>(models=None, dataset="test", target=0, threshold=0.5, title=None, legend="upper right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L245>[source]</a></span></div>Plot a model's confusion matrix.</p>
 <p>For one model, the plot shows a heatmap. For multiple models,
 it compares TP, FP, FN and TN in a barplot (not implemented
 for multiclass classification tasks). This plot is available
diff --git a/docs/API/plots/plot_correlation/index.html b/docs/API/plots/plot_correlation/index.html
index c3554e24d..d62f88d25 100644
--- a/docs/API/plots/plot_correlation/index.html
+++ b/docs/API/plots/plot_correlation/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_correlation">plot_correlation</h1>
 <hr />
-<p><a id='plot_correlation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_correlation</strong>(columns=None, method="pearson", title=None, legend=None, figsize=(800, 700), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1567>[source]</a></span></div>Plot a correlation matrix.</p>
+<p><a id='plot_correlation'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_correlation</strong>(columns=None, method="pearson", title=None, legend=None, figsize=(800, 700), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L40>[source]</a></span></div>Plot a correlation matrix.</p>
 <p>Displays a heatmap showing the correlation between columns in
 the dataset. The colors red, blue and white stand for positive,
 negative, and no correlation respectively.</p>
diff --git a/docs/API/plots/plot_det/index.html b/docs/API/plots/plot_det/index.html
index 67645ea1a..b7472dda9 100644
--- a/docs/API/plots/plot_det/index.html
+++ b/docs/API/plots/plot_det/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_det">plot_det</h1>
 <hr />
-<p><a id='plot_det'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_det</strong>(models=None, dataset="test", target=0, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4339>[source]</a></span></div>Plot the Detection Error Tradeoff curve.</p>
+<p><a id='plot_det'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_det</strong>(models=None, dataset="test", target=0, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L452>[source]</a></span></div>Plot the Detection Error Tradeoff curve.</p>
 <p>Read more about <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_det.html">DET</a> in sklearn's documentation. Only
 available for binary classification tasks.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_distribution/index.html b/docs/API/plots/plot_distribution/index.html
index 598511187..49c1fb4bf 100644
--- a/docs/API/plots/plot_distribution/index.html
+++ b/docs/API/plots/plot_distribution/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_distribution">plot_distribution</h1>
 <hr />
-<p><a id='plot_distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_distribution</strong>(columns=0, distributions=None, show=None, title=None, legend="upper right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1701>[source]</a></span></div>Plot column distributions.</p>
+<p><a id='plot_distribution'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_distribution</strong>(columns=0, distributions=None, show=None, title=None, legend="upper right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L174>[source]</a></span></div>Plot column distributions.</p>
 <ul>
 <li>For numerical columns, plot the probability density
   distribution. Additionally, it's possible to plot any of
diff --git a/docs/API/plots/plot_edf/index.html b/docs/API/plots/plot_edf/index.html
index d67f420b5..943b62063 100644
--- a/docs/API/plots/plot_edf/index.html
+++ b/docs/API/plots/plot_edf/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_edf">plot_edf</h1>
 <hr />
-<p><a id='plot_edf'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_edf</strong>(models=None, metric=None, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2526>[source]</a></span></div>Plot the Empirical Distribution Function of a study.</p>
+<p><a id='plot_edf'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_edf</strong>(models=None, metric=None, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L47>[source]</a></span></div>Plot the Empirical Distribution Function of a study.</p>
 <p>Use this plot to analyze and improve hyperparameter search
 spaces. The EDF assumes that the value of the objective
 function is in accordance with the uniform distribution over
diff --git a/docs/API/plots/plot_errors/index.html b/docs/API/plots/plot_errors/index.html
index f80c71f1a..8df41553f 100644
--- a/docs/API/plots/plot_errors/index.html
+++ b/docs/API/plots/plot_errors/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_errors">plot_errors</h1>
 <hr />
-<p><a id='plot_errors'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_errors</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4459>[source]</a></span></div>Plot a model's prediction errors.</p>
+<p><a id='plot_errors'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_errors</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L572>[source]</a></span></div>Plot a model's prediction errors.</p>
 <p>Plot the actual targets from a set against the predicted values
 generated by the regressor. A linear fit is made on the data.
 The gray, intersected line shows the identity line. This plot
diff --git a/docs/API/plots/plot_evals/index.html b/docs/API/plots/plot_evals/index.html
index 820cac3a4..2c34c6103 100644
--- a/docs/API/plots/plot_evals/index.html
+++ b/docs/API/plots/plot_evals/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_evals">plot_evals</h1>
 <hr />
-<p><a id='plot_evals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_evals</strong>(models=None, dataset="test", title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4601>[source]</a></span></div>Plot evaluation curves.</p>
+<p><a id='plot_evals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_evals</strong>(models=None, dataset="test", title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L714>[source]</a></span></div>Plot evaluation curves.</p>
 <p>The evaluation curves are the main metric scores achieved by the
 models at every iteration of the training process. This plot is
 available only for models that allow <a class="autorefs autorefs-internal" href="../../../user_guide/training/#in-training-validation">in-training validation</a>.</p>
diff --git a/docs/API/plots/plot_feature_importance/index.html b/docs/API/plots/plot_feature_importance/index.html
index 6b76d9d13..4c546224d 100644
--- a/docs/API/plots/plot_feature_importance/index.html
+++ b/docs/API/plots/plot_feature_importance/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_feature_importance">plot_feature_importance</h1>
 <hr />
-<p><a id='plot_feature_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_feature_importance</strong>(models=None, show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4717>[source]</a></span></div>Plot a model's feature importance.</p>
+<p><a id='plot_feature_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_feature_importance</strong>(models=None, show=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L830>[source]</a></span></div>Plot a model's feature importance.</p>
 <p>The sum of importances for all features (per model) is 1.
 This plot is available only for models whose estimator has
 a <code>scores_</code>, <code>feature_importances_</code> or <code>coef</code> attribute.</p>
diff --git a/docs/API/plots/plot_forecast/index.html b/docs/API/plots/plot_forecast/index.html
index 646e6e0f0..fbf3e017a 100644
--- a/docs/API/plots/plot_forecast/index.html
+++ b/docs/API/plots/plot_forecast/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_forecast">plot_forecast</h1>
 <hr />
-<p><a id='plot_forecast'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_forecast</strong>(models=None, fh="test", X=None, target=0, plot_interval=True, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L4850>[source]</a></span></div>Plot a time series with model forecasts.</p>
+<p><a id='plot_forecast'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_forecast</strong>(models=None, fh="test", X=None, target=0, plot_interval=True, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L963>[source]</a></span></div>Plot a time series with model forecasts.</p>
 <p>This plot is only available for forecasting tasks.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='plot_forecast-models'></a><strong>models: int, str, Model, slice, sequence or None, default=None</strong><br><div markdown class='param'>
diff --git a/docs/API/plots/plot_gains/index.html b/docs/API/plots/plot_gains/index.html
index 288ff960d..24bde5903 100644
--- a/docs/API/plots/plot_gains/index.html
+++ b/docs/API/plots/plot_gains/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_gains">plot_gains</h1>
 <hr />
-<p><a id='plot_gains'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_gains</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5057>[source]</a></span></div>Plot the cumulative gains curve.</p>
+<p><a id='plot_gains'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_gains</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L1170>[source]</a></span></div>Plot the cumulative gains curve.</p>
 <p>This plot is available only for binary and <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a>
 classification tasks.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_hyperparameter_importance/index.html b/docs/API/plots/plot_hyperparameter_importance/index.html
index 1d7cf5c50..0c278044d 100644
--- a/docs/API/plots/plot_hyperparameter_importance/index.html
+++ b/docs/API/plots/plot_hyperparameter_importance/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_hyperparameter_importance">plot_hyperparameter_importance</h1>
 <hr />
-<p><a id='plot_hyperparameter_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_hyperparameter_importance</strong>(models=None, metric=0, show=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2671>[source]</a></span></div>Plot a model's hyperparameter importance.</p>
+<p><a id='plot_hyperparameter_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_hyperparameter_importance</strong>(models=None, metric=0, show=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L192>[source]</a></span></div>Plot a model's hyperparameter importance.</p>
 <p>The hyperparameter importance are calculated using the
 <a href="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.importance.FanovaImportanceEvaluator.html">fANOVA</a> importance evaluator. The sum of importances for all
 parameters (per model) is 1. This plot is only available for
diff --git a/docs/API/plots/plot_hyperparameters/index.html b/docs/API/plots/plot_hyperparameters/index.html
index b3985f061..6e20583bb 100644
--- a/docs/API/plots/plot_hyperparameters/index.html
+++ b/docs/API/plots/plot_hyperparameters/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_hyperparameters">plot_hyperparameters</h1>
 <hr />
-<p><a id='plot_hyperparameters'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_hyperparameters</strong>(models=None, params=(0, 1), metric=0, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2815>[source]</a></span></div>Plot hyperparameter relationships in a study.</p>
+<p><a id='plot_hyperparameters'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_hyperparameters</strong>(models=None, params=(0, 1), metric=0, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L336>[source]</a></span></div>Plot hyperparameter relationships in a study.</p>
 <p>A model's hyperparameters are plotted against each other. The
 corresponding metric scores are displayed in a contour plot.
 The markers are the trials in the study. This plot is only
diff --git a/docs/API/plots/plot_learning_curve/index.html b/docs/API/plots/plot_learning_curve/index.html
index 2c8ea5f98..100e1747a 100644
--- a/docs/API/plots/plot_learning_curve/index.html
+++ b/docs/API/plots/plot_learning_curve/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_learning_curve">plot_learning_curve</h1>
 <hr />
-<p><a id='plot_learning_curve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_learning_curve</strong>(models=None, metric=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5180>[source]</a></span></div>Plot the learning curve: score vs number of training samples.</p>
+<p><a id='plot_learning_curve'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_learning_curve</strong>(models=None, metric=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L1293>[source]</a></span></div>Plot the learning curve: score vs number of training samples.</p>
 <p>This plot is available only for models fitted using
 <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a>. <a class="autorefs autorefs-internal" href="../../../user_guide/models/#ensembles">Ensembles</a> are ignored.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_lift/index.html b/docs/API/plots/plot_lift/index.html
index 9ff25a71d..80ae5e1ca 100644
--- a/docs/API/plots/plot_lift/index.html
+++ b/docs/API/plots/plot_lift/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_lift">plot_lift</h1>
 <hr />
-<p><a id='plot_lift'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_lift</strong>(models=None, dataset="test", target=0, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5333>[source]</a></span></div>Plot the lift curve.</p>
+<p><a id='plot_lift'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_lift</strong>(models=None, dataset="test", target=0, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L1446>[source]</a></span></div>Plot the lift curve.</p>
 <p>Only available for binary classification tasks.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='plot_lift-models'></a><strong>models: int, str, Model, slice, sequence or None, default=None</strong><br><div markdown class='param'>
diff --git a/docs/API/plots/plot_ngrams/index.html b/docs/API/plots/plot_ngrams/index.html
index deb3483a4..a6aef3c64 100644
--- a/docs/API/plots/plot_ngrams/index.html
+++ b/docs/API/plots/plot_ngrams/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_ngrams">plot_ngrams</h1>
 <hr />
-<p><a id='plot_ngrams'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_ngrams</strong>(ngram="bigram", index=None, show=10, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1920>[source]</a></span></div>Plot n-gram frequencies.</p>
+<p><a id='plot_ngrams'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_ngrams</strong>(ngram="bigram", index=None, show=10, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L393>[source]</a></span></div>Plot n-gram frequencies.</p>
 <p>The text for the plot is extracted from the column named
 <code>corpus</code>. If there is no column with that name, an exception
 is raised. If the documents are not tokenized, the words are
diff --git a/docs/API/plots/plot_parallel_coordinate/index.html b/docs/API/plots/plot_parallel_coordinate/index.html
index d1a1f3967..c4696eb6d 100644
--- a/docs/API/plots/plot_parallel_coordinate/index.html
+++ b/docs/API/plots/plot_parallel_coordinate/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_parallel_coordinate">plot_parallel_coordinate</h1>
 <hr />
-<p><a id='plot_parallel_coordinate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_parallel_coordinate</strong>(models=None, params=None, metric=0, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3020>[source]</a></span></div>Plot high-dimensional parameter relationships in a study.</p>
+<p><a id='plot_parallel_coordinate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_parallel_coordinate</strong>(models=None, params=None, metric=0, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L541>[source]</a></span></div>Plot high-dimensional parameter relationships in a study.</p>
 <p>Every line of the plot represents one trial. This plot is only
 available for models that ran <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a>.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_pareto_front/index.html b/docs/API/plots/plot_pareto_front/index.html
index 36c14508a..9e938c45c 100644
--- a/docs/API/plots/plot_pareto_front/index.html
+++ b/docs/API/plots/plot_pareto_front/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_pareto_front">plot_pareto_front</h1>
 <hr />
-<p><a id='plot_pareto_front'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pareto_front</strong>(models=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3191>[source]</a></span></div>Plot the Pareto front of a study.</p>
+<p><a id='plot_pareto_front'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pareto_front</strong>(models=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L712>[source]</a></span></div>Plot the Pareto front of a study.</p>
 <p>Shows the trial scores plotted against each other. The marker's
 colors indicate the trial number. This plot is only available
 for models that ran <a class="autorefs autorefs-internal" href="../../../user_guide/training/#multi-metric-runs">multi-metric runs</a> with
diff --git a/docs/API/plots/plot_parshap/index.html b/docs/API/plots/plot_parshap/index.html
index 500a31baf..2e8234215 100644
--- a/docs/API/plots/plot_parshap/index.html
+++ b/docs/API/plots/plot_parshap/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_parshap">plot_parshap</h1>
 <hr />
-<p><a id='plot_parshap'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_parshap</strong>(models=None, columns=None, target=1, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5455>[source]</a></span></div>Plot the partial correlation of shap values.</p>
+<p><a id='plot_parshap'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_parshap</strong>(models=None, columns=None, target=1, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L1568>[source]</a></span></div>Plot the partial correlation of shap values.</p>
 <p>Plots the train and test correlation between the shap value of
 every feature with its target value, after removing the effect
 of all other features (partial correlation). This plot is
diff --git a/docs/API/plots/plot_partial_dependence/index.html b/docs/API/plots/plot_partial_dependence/index.html
index 601304cb5..660e2b59d 100644
--- a/docs/API/plots/plot_partial_dependence/index.html
+++ b/docs/API/plots/plot_partial_dependence/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_partial_dependence">plot_partial_dependence</h1>
 <hr />
-<p><a id='plot_partial_dependence'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_partial_dependence</strong>(models=None, columns=None, kind="average", pair=None, target=1, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5653>[source]</a></span></div>Plot the partial dependence of features.</p>
+<p><a id='plot_partial_dependence'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_partial_dependence</strong>(models=None, columns=None, kind="average", pair=None, target=1, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L1766>[source]</a></span></div>Plot the partial dependence of features.</p>
 <p>The partial dependence of a feature (or a set of features)
 corresponds to the response of the model for each possible
 value of the feature. The plot can take two forms:</p>
diff --git a/docs/API/plots/plot_pca/index.html b/docs/API/plots/plot_pca/index.html
index afdbc4087..e3077a7d9 100644
--- a/docs/API/plots/plot_pca/index.html
+++ b/docs/API/plots/plot_pca/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_pca">plot_pca</h1>
 <hr />
-<p><a id='plot_pca'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pca</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1284>[source]</a></span></div>Plot the explained variance ratio vs number of components.</p>
+<p><a id='plot_pca'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pca</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L157>[source]</a></span></div>Plot the explained variance ratio vs number of components.</p>
 <p>If the underlying estimator is <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">PCA</a> (for dense datasets),
 all possible components are plotted. If the underlying estimator
 is <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html">TruncatedSVD</a> (for sparse datasets), it only shows the
diff --git a/docs/API/plots/plot_permutation_importance/index.html b/docs/API/plots/plot_permutation_importance/index.html
index dcb30cb1f..a2c15e8bc 100644
--- a/docs/API/plots/plot_permutation_importance/index.html
+++ b/docs/API/plots/plot_permutation_importance/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_permutation_importance">plot_permutation_importance</h1>
 <hr />
-<p><a id='plot_permutation_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_permutation_importance</strong>(models=None, show=None, n_repeats=10, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L5945>[source]</a></span></div>Plot the feature permutation importance of models.</p>
+<p><a id='plot_permutation_importance'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_permutation_importance</strong>(models=None, show=None, n_repeats=10, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2058>[source]</a></span></div>Plot the feature permutation importance of models.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>This method can be slow. Results are cached to fasten
diff --git a/docs/API/plots/plot_pipeline/index.html b/docs/API/plots/plot_pipeline/index.html
index a3d080fca..190019354 100644
--- a/docs/API/plots/plot_pipeline/index.html
+++ b/docs/API/plots/plot_pipeline/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_pipeline">plot_pipeline</h1>
 <hr />
-<p><a id='plot_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pipeline</strong>(models=None, draw_hyperparameter_tuning=True, color_branches=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L6090>[source]</a></span></div>Plot a diagram of the pipeline.</p>
+<p><a id='plot_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_pipeline</strong>(models=None, draw_hyperparameter_tuning=True, color_branches=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2203>[source]</a></span></div>Plot a diagram of the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>This plot uses the <a href="https://schemdraw.readthedocs.io/en/latest/index.html">schemdraw</a> package, which is
diff --git a/docs/API/plots/plot_prc/index.html b/docs/API/plots/plot_prc/index.html
index 06120a14f..4393c84e6 100644
--- a/docs/API/plots/plot_prc/index.html
+++ b/docs/API/plots/plot_prc/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_prc">plot_prc</h1>
 <hr />
-<p><a id='plot_prc'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_prc</strong>(models=None, dataset="test", target=0, title=None, legend="lower left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L6397>[source]</a></span></div>Plot the precision-recall curve.</p>
+<p><a id='plot_prc'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_prc</strong>(models=None, dataset="test", target=0, title=None, legend="lower left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2510>[source]</a></span></div>Plot the precision-recall curve.</p>
 <p>Read more about <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html">PRC</a> in sklearn's documentation. Only
 available for binary classification tasks.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_probabilities/index.html b/docs/API/plots/plot_probabilities/index.html
index d3a741948..7821af324 100644
--- a/docs/API/plots/plot_probabilities/index.html
+++ b/docs/API/plots/plot_probabilities/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_probabilities">plot_probabilities</h1>
 <hr />
-<p><a id='plot_probabilities'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_probabilities</strong>(models=None, dataset="test", target=1, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L6521>[source]</a></span></div>Plot the probability distribution of the target classes.</p>
+<p><a id='plot_probabilities'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_probabilities</strong>(models=None, dataset="test", target=1, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2634>[source]</a></span></div>Plot the probability distribution of the target classes.</p>
 <p>This plot is available only for models with a <code>predict_proba</code>
 method in classification tasks.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_qq/index.html b/docs/API/plots/plot_qq/index.html
index 127164819..8415f5ea2 100644
--- a/docs/API/plots/plot_qq/index.html
+++ b/docs/API/plots/plot_qq/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_qq">plot_qq</h1>
 <hr />
-<p><a id='plot_qq'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_qq</strong>(columns=0, distributions="norm", title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2098>[source]</a></span></div>Plot a quantile-quantile plot.</p>
+<p><a id='plot_qq'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_qq</strong>(columns=0, distributions="norm", title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L571>[source]</a></span></div>Plot a quantile-quantile plot.</p>
 <p>Columns are distinguished by color and the distributions are
 distinguished by marker type. Missing values are ignored.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_relationships/index.html b/docs/API/plots/plot_relationships/index.html
index 166e34b75..c299da0e2 100644
--- a/docs/API/plots/plot_relationships/index.html
+++ b/docs/API/plots/plot_relationships/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_relationships">plot_relationships</h1>
 <hr />
-<p><a id='plot_relationships'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_relationships</strong>(columns=(0, 1, 2), title=None, legend=None, figsize=(900, 900), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2219>[source]</a></span></div>Plot pairwise relationships in a dataset.</p>
+<p><a id='plot_relationships'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_relationships</strong>(columns=(0, 1, 2), title=None, legend=None, figsize=(900, 900), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L692>[source]</a></span></div>Plot pairwise relationships in a dataset.</p>
 <p>Creates a grid of axes such that each numerical column appears
 once on the x-axes and once on the y-axes. The bottom triangle
 contains scatter plots (max 250 random samples), the diagonal
diff --git a/docs/API/plots/plot_residuals/index.html b/docs/API/plots/plot_residuals/index.html
index 89837bd62..f56412738 100644
--- a/docs/API/plots/plot_residuals/index.html
+++ b/docs/API/plots/plot_residuals/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_residuals">plot_residuals</h1>
 <hr />
-<p><a id='plot_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_residuals</strong>(models=None, dataset="test", target=0, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L6662>[source]</a></span></div>Plot a model's residuals.</p>
+<p><a id='plot_residuals'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_residuals</strong>(models=None, dataset="test", target=0, title=None, legend="upper left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2775>[source]</a></span></div>Plot a model's residuals.</p>
 <p>The plot shows the residuals (difference between the predicted
 and the true value) on the vertical axis and the independent
 variable on the horizontal axis. The gray, intersected line
diff --git a/docs/API/plots/plot_results/index.html b/docs/API/plots/plot_results/index.html
index 7c7b1bb73..899161221 100644
--- a/docs/API/plots/plot_results/index.html
+++ b/docs/API/plots/plot_results/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_results">plot_results</h1>
 <hr />
-<p><a id='plot_results'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_results</strong>(models=None, metric=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L6813>[source]</a></span></div>Plot the model results.</p>
+<p><a id='plot_results'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_results</strong>(models=None, metric=None, title=None, legend="lower right", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L2926>[source]</a></span></div>Plot the model results.</p>
 <p>If all models applied bootstrap, the plot is a boxplot. If
 not, the plot is a barplot. Models are ordered based on
 their score from the top down. The score is either the
diff --git a/docs/API/plots/plot_rfecv/index.html b/docs/API/plots/plot_rfecv/index.html
index 07275b10b..ed73cf2b6 100644
--- a/docs/API/plots/plot_rfecv/index.html
+++ b/docs/API/plots/plot_rfecv/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_rfecv">plot_rfecv</h1>
 <hr />
-<p><a id='plot_rfecv'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_rfecv</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1401>[source]</a></span></div>Plot the rfecv results.</p>
+<p><a id='plot_rfecv'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_rfecv</strong>(title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/featureselectionplot.py#L274>[source]</a></span></div>Plot the rfecv results.</p>
 <p>Plot the scores obtained by the estimator fitted on every
 subset of the dataset. Only available when feature selection
 was applied with strategy="rfecv".</p>
diff --git a/docs/API/plots/plot_roc/index.html b/docs/API/plots/plot_roc/index.html
index 119a4297f..a0731761d 100644
--- a/docs/API/plots/plot_roc/index.html
+++ b/docs/API/plots/plot_roc/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_roc">plot_roc</h1>
 <hr />
-<p><a id='plot_roc'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_roc</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7013>[source]</a></span></div>Plot the Receiver Operating Characteristics curve.</p>
+<p><a id='plot_roc'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_roc</strong>(models=None, dataset="test", target=0, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L3126>[source]</a></span></div>Plot the Receiver Operating Characteristics curve.</p>
 <p>Read more about <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html">ROC</a> in sklearn's documentation. Only
 available for classification tasks.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_shap_bar/index.html b/docs/API/plots/plot_shap_bar/index.html
index b0a177a32..3dd47443f 100644
--- a/docs/API/plots/plot_shap_bar/index.html
+++ b/docs/API/plots/plot_shap_bar/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_bar">plot_shap_bar</h1>
 <hr />
-<p><a id='plot_shap_bar'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_bar</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7446>[source]</a></span></div>Plot SHAP's bar plot.</p>
+<p><a id='plot_shap_bar'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_bar</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L34>[source]</a></span></div>Plot SHAP's bar plot.</p>
 <p>Create a bar plot of a set of SHAP values. If a single sample
 is passed, then the SHAP values are plotted. If many samples
 are passed, then the mean absolute value for each feature
diff --git a/docs/API/plots/plot_shap_beeswarm/index.html b/docs/API/plots/plot_shap_beeswarm/index.html
index 90577fd2d..e60ea226a 100644
--- a/docs/API/plots/plot_shap_beeswarm/index.html
+++ b/docs/API/plots/plot_shap_beeswarm/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_beeswarm">plot_shap_beeswarm</h1>
 <hr />
-<p><a id='plot_shap_beeswarm'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_beeswarm</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7560>[source]</a></span></div>Plot SHAP's beeswarm plot.</p>
+<p><a id='plot_shap_beeswarm'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_beeswarm</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L148>[source]</a></span></div>Plot SHAP's beeswarm plot.</p>
 <p>The plot is colored by feature values. Read more about SHAP
 plots in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#shap">user guide</a>.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_shap_decision/index.html b/docs/API/plots/plot_shap_decision/index.html
index b6777f22f..30f483eb5 100644
--- a/docs/API/plots/plot_shap_decision/index.html
+++ b/docs/API/plots/plot_shap_decision/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_decision">plot_shap_decision</h1>
 <hr />
-<p><a id='plot_shap_decision'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_decision</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7671>[source]</a></span></div>Plot SHAP's decision plot.</p>
+<p><a id='plot_shap_decision'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_decision</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L259>[source]</a></span></div>Plot SHAP's decision plot.</p>
 <p>Visualize model decisions using cumulative SHAP values. Each
 plotted line explains a single model prediction. If a single
 prediction is plotted, feature values are printed in the
diff --git a/docs/API/plots/plot_shap_force/index.html b/docs/API/plots/plot_shap_force/index.html
index 917cfeb07..64d9079d2 100644
--- a/docs/API/plots/plot_shap_force/index.html
+++ b/docs/API/plots/plot_shap_force/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_force">plot_shap_force</h1>
 <hr />
-<p><a id='plot_shap_force'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_force</strong>(models=None, index=None, target=1, title=None, legend=None, figsize=(900, 300), filename=None, display=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7795>[source]</a></span></div>Plot SHAP's force plot.</p>
+<p><a id='plot_shap_force'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_force</strong>(models=None, index=None, target=1, title=None, legend=None, figsize=(900, 300), filename=None, display=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L383>[source]</a></span></div>Plot SHAP's force plot.</p>
 <p>Visualize the given SHAP values with an additive force layout.
 Note that by default this plot will render using javascript.
 For a regular figure use <code>matplotlib=True</code> (this option is
diff --git a/docs/API/plots/plot_shap_heatmap/index.html b/docs/API/plots/plot_shap_heatmap/index.html
index 1d2bf8f55..697460e56 100644
--- a/docs/API/plots/plot_shap_heatmap/index.html
+++ b/docs/API/plots/plot_shap_heatmap/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_heatmap">plot_shap_heatmap</h1>
 <hr />
-<p><a id='plot_shap_heatmap'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_heatmap</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7923>[source]</a></span></div>Plot SHAP's heatmap plot.</p>
+<p><a id='plot_shap_heatmap'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_heatmap</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L511>[source]</a></span></div>Plot SHAP's heatmap plot.</p>
 <p>This plot is designed to show the population substructure of a
 dataset using supervised clustering and a heatmap. Supervised
 clustering involves clustering data points not by their original
diff --git a/docs/API/plots/plot_shap_scatter/index.html b/docs/API/plots/plot_shap_scatter/index.html
index 3baa5faff..8a5680b1f 100644
--- a/docs/API/plots/plot_shap_scatter/index.html
+++ b/docs/API/plots/plot_shap_scatter/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_scatter">plot_shap_scatter</h1>
 <hr />
-<p><a id='plot_shap_scatter'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_scatter</strong>(models=None, index=None, columns=0, target=1, title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L8038>[source]</a></span></div>Plot SHAP's scatter plot.</p>
+<p><a id='plot_shap_scatter'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_scatter</strong>(models=None, index=None, columns=0, target=1, title=None, legend=None, figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L626>[source]</a></span></div>Plot SHAP's scatter plot.</p>
 <p>Plots the value of the feature on the x-axis and the SHAP value
 of the same feature on the y-axis. This shows how the model
 depends on the given feature, and is like a richer extension of
diff --git a/docs/API/plots/plot_shap_waterfall/index.html b/docs/API/plots/plot_shap_waterfall/index.html
index 7a85f4845..506ed4e89 100644
--- a/docs/API/plots/plot_shap_waterfall/index.html
+++ b/docs/API/plots/plot_shap_waterfall/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_shap_waterfall">plot_shap_waterfall</h1>
 <hr />
-<p><a id='plot_shap_waterfall'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_waterfall</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L8156>[source]</a></span></div>Plot SHAP's waterfall plot.</p>
+<p><a id='plot_shap_waterfall'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_shap_waterfall</strong>(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/shapplot.py#L744>[source]</a></span></div>Plot SHAP's waterfall plot.</p>
 <p>The SHAP value of a feature represents the impact of the
 evidence provided by that feature on the model’s output. The
 waterfall plot is designed to visually display how the SHAP
diff --git a/docs/API/plots/plot_slice/index.html b/docs/API/plots/plot_slice/index.html
index 41b26b41b..e54db2230 100644
--- a/docs/API/plots/plot_slice/index.html
+++ b/docs/API/plots/plot_slice/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_slice">plot_slice</h1>
 <hr />
-<p><a id='plot_slice'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_slice</strong>(models=None, params=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3342>[source]</a></span></div>Plot the parameter relationship in a study.</p>
+<p><a id='plot_slice'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_slice</strong>(models=None, params=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L863>[source]</a></span></div>Plot the parameter relationship in a study.</p>
 <p>The color of the markers indicate the trial. This plot is only
 available for models that ran <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a>.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_successive_halving/index.html b/docs/API/plots/plot_successive_halving/index.html
index 38cf2e24b..d2e58e4af 100644
--- a/docs/API/plots/plot_successive_halving/index.html
+++ b/docs/API/plots/plot_successive_halving/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_successive_halving">plot_successive_halving</h1>
 <hr />
-<p><a id='plot_successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_successive_halving</strong>(models=None, metric=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7137>[source]</a></span></div>Plot scores per iteration of the successive halving.</p>
+<p><a id='plot_successive_halving'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_successive_halving</strong>(models=None, metric=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L3250>[source]</a></span></div>Plot scores per iteration of the successive halving.</p>
 <p>Only use with models fitted using <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a>.
 <a class="autorefs autorefs-internal" href="../../../user_guide/models/#ensembles">Ensembles</a> are ignored.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_terminator_improvement/index.html b/docs/API/plots/plot_terminator_improvement/index.html
index d4fb32ce2..b637f665a 100644
--- a/docs/API/plots/plot_terminator_improvement/index.html
+++ b/docs/API/plots/plot_terminator_improvement/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_terminator_improvement">plot_terminator_improvement</h1>
 <hr />
-<p><a id='plot_terminator_improvement'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_terminator_improvement</strong>(models=None, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3497>[source]</a></span></div>Plot the potentials for future objective improvement.</p>
+<p><a id='plot_terminator_improvement'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_terminator_improvement</strong>(models=None, title=None, legend="upper right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L1018>[source]</a></span></div>Plot the potentials for future objective improvement.</p>
 <p>This function visualizes the objective improvement potentials.
 It helps to determine whether you should continue the
 optimization or not. The evaluated error is also plotted. Note
diff --git a/docs/API/plots/plot_threshold/index.html b/docs/API/plots/plot_threshold/index.html
index ab5873885..eecd628e8 100644
--- a/docs/API/plots/plot_threshold/index.html
+++ b/docs/API/plots/plot_threshold/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_threshold">plot_threshold</h1>
 <hr />
-<p><a id='plot_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_threshold</strong>(models=None, metric=None, dataset="test", target=0, steps=100, title=None, legend="lower left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L7292>[source]</a></span></div>Plot metric performances against threshold values.</p>
+<p><a id='plot_threshold'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_threshold</strong>(models=None, metric=None, dataset="test", target=0, steps=100, title=None, legend="lower left", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/predictionplot.py#L3405>[source]</a></span></div>Plot metric performances against threshold values.</p>
 <p>This plot is available only for models with a <code>predict_proba</code>
 method in a binary or <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#multilabel">multilabel</a> classification task.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_timeline/index.html b/docs/API/plots/plot_timeline/index.html
index 9e4a31f34..017fed6cb 100644
--- a/docs/API/plots/plot_timeline/index.html
+++ b/docs/API/plots/plot_timeline/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_timeline">plot_timeline</h1>
 <hr />
-<p><a id='plot_timeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_timeline</strong>(models=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3624>[source]</a></span></div>Plot the timeline of a study.</p>
+<p><a id='plot_timeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_timeline</strong>(models=None, title=None, legend="lower right", figsize=(900, 600), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L1145>[source]</a></span></div>Plot the timeline of a study.</p>
 <p>This plot is only available for models that ran
 <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a>.</p>
 <table class="table_params">
diff --git a/docs/API/plots/plot_trials/index.html b/docs/API/plots/plot_trials/index.html
index 983a07d68..c660f41a0 100644
--- a/docs/API/plots/plot_trials/index.html
+++ b/docs/API/plots/plot_trials/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_trials">plot_trials</h1>
 <hr />
-<p><a id='plot_trials'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_trials</strong>(models=None, metric=None, title=None, legend="upper left", figsize=(900, 800), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L3777>[source]</a></span></div>Plot the hyperparameter tuning trials.</p>
+<p><a id='plot_trials'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_trials</strong>(models=None, metric=None, title=None, legend="upper left", figsize=(900, 800), filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/hyperparametertuningplot.py#L1298>[source]</a></span></div>Plot the hyperparameter tuning trials.</p>
 <p>Creates a figure with two plots: the first plot shows the score
 of every trial and the second shows the distance between the
 last consecutive steps. The best trial is indicated with a star.
diff --git a/docs/API/plots/plot_wordcloud/index.html b/docs/API/plots/plot_wordcloud/index.html
index 646e41f80..3952c97aa 100644
--- a/docs/API/plots/plot_wordcloud/index.html
+++ b/docs/API/plots/plot_wordcloud/index.html
@@ -1162,7 +1162,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1204,7 +1204,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1246,7 +1246,7 @@
   
     <li class="md-nav__item">
       <a href="../../training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3226,7 +3226,7 @@
 
 <h1 id="plot_wordcloud">plot_wordcloud</h1>
 <hr />
-<p><a id='plot_wordcloud'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_wordcloud</strong>(index=None, title=None, legend=None, figsize=(900, 600), filename=None, display=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L2382>[source]</a></span></div>Plot a wordcloud from the corpus.</p>
+<p><a id='plot_wordcloud'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>plot_wordcloud</strong>(index=None, title=None, legend=None, figsize=(900, 600), filename=None, display=True, **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/dataplot.py#L855>[source]</a></span></div>Plot a wordcloud from the corpus.</p>
 <p>The text for the plot is extracted from the column named
 <code>corpus</code>. If there is no column with that name, an exception
 is raised.</p>
diff --git a/docs/API/training/directclassifier/index.html b/docs/API/training/directclassifier/index.html
index 9e0abc154..b350f860a 100644
--- a/docs/API/training/directclassifier/index.html
+++ b/docs/API/training/directclassifier/index.html
@@ -1253,7 +1253,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1295,7 +1295,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="directclassifier">DirectClassifier</h1>
 <hr />
-<p><a id='DirectClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectClassifier</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L248>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='DirectClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectClassifier</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L255>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>The following steps are applied to every model:</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3403,7 +3403,7 @@ <h1 id="directclassifier">DirectClassifier</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='directclassifier-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3418,17 +3418,16 @@ <h1 id="directclassifier">DirectClassifier</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='directclassifier-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='directclassifier-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3436,7 +3435,7 @@ <h1 id="directclassifier">DirectClassifier</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3516,7 +3515,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directclassifier-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directclassifier-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='directclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directclassifier-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directclassifier-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='directclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3560,7 +3559,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directclassifier-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directclassifier-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='directclassifier-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='directclassifier-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='directclassifier-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='directclassifier-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='directclassifier-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3572,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directclassifier-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='directclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='directclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3594,7 +3593,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='directclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3639,7 +3638,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='directclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#directclassifier-save">saving</a> the instance. The affected
@@ -3654,7 +3653,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='directclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='directclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#directclassifier-save">saving</a>. Deleted models are not removed from
@@ -3665,7 +3664,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='directclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3692,7 +3691,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='directclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3731,7 +3730,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='directclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3756,7 +3755,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='directclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3768,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='directclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3786,7 +3785,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='directclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3795,7 +3794,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='directclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3804,9 +3803,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='directclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='directclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L49>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='directclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L54>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3819,7 +3818,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='directclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3839,7 +3838,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='directclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3859,7 +3858,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='directclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3924,13 +3923,13 @@ <h2 id="methods">Methods</h2>
         
         
           
-          <a href="../directforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: DirectRegressor" rel="next">
+          <a href="../directforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: DirectForecaster" rel="next">
             <div class="md-footer__title">
               <span class="md-footer__direction">
                 Next
               </span>
               <div class="md-ellipsis">
-                DirectRegressor
+                DirectForecaster
               </div>
             </div>
             <div class="md-footer__button md-icon">
@@ -4037,4 +4036,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/directforecaster/index.html b/docs/API/training/directforecaster/index.html
index 8b2411b65..e98898bdf 100644
--- a/docs/API/training/directforecaster/index.html
+++ b/docs/API/training/directforecaster/index.html
@@ -22,7 +22,7 @@
       <meta name="generator" content="mkdocs-1.5.2, mkdocs-material-9.1.21">
     
     
-  <title>DirectRegressor - ATOM</title>
+  <title>DirectForecaster - ATOM</title>
 
     
       <link rel="stylesheet" href="../../../assets/stylesheets/main.eebd395e.min.css">
@@ -135,7 +135,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              DirectRegressor
+              DirectForecaster
             
           </span>
         </div>
@@ -1173,12 +1173,12 @@
       
       
         <label class="md-nav__link md-nav__link--active" for="__toc">
-          DirectRegressor
+          DirectForecaster
           <span class="md-nav__icon md-icon"></span>
         </label>
       
       <a href="./" class="md-nav__link md-nav__link--active">
-        DirectRegressor
+        DirectForecaster
       </a>
       
         
@@ -1295,7 +1295,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="directforecaster">DirectForecaster</h1>
 <hr />
-<p><a id='DirectForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectForecaster</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L470>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='DirectForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectForecaster</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L477>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>The following steps are applied to every model:</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3398,7 +3398,7 @@ <h1 id="directforecaster">DirectForecaster</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='directforecaster-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3413,17 +3413,16 @@ <h1 id="directforecaster">DirectForecaster</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='directforecaster-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='directforecaster-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3431,7 +3430,7 @@ <h1 id="directforecaster">DirectForecaster</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3508,7 +3507,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directforecaster-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directforecaster-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='directforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directforecaster-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directforecaster-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='directforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3552,7 +3551,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directforecaster-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directforecaster-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='directforecaster-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='directforecaster-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='directforecaster-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='directforecaster-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='directforecaster-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3564,7 +3563,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directforecaster-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='directforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='directforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3586,7 +3585,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='directforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3631,7 +3630,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='directforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#directforecaster-save">saving</a> the instance. The affected
@@ -3646,7 +3645,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='directforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='directforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#directforecaster-save">saving</a>. Deleted models are not removed from
@@ -3657,7 +3656,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='directforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3684,7 +3683,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='directforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3723,7 +3722,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='directforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3748,7 +3747,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='directforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3760,7 +3759,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='directforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3778,7 +3777,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='directforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3787,7 +3786,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='directforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3796,9 +3795,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='directforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='directforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L49>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='directforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L54>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3811,7 +3810,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='directforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3831,7 +3830,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='directforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3851,7 +3850,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='directforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4029,4 +4028,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/directregressor/index.html b/docs/API/training/directregressor/index.html
index 0a1a60fa6..48e70803b 100644
--- a/docs/API/training/directregressor/index.html
+++ b/docs/API/training/directregressor/index.html
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1295,7 +1295,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="directregressor">DirectRegressor</h1>
 <hr />
-<p><a id='DirectRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectRegressor</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L685>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
+<p><a id='DirectRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>DirectRegressor</strong>(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L692>[source]</a></span></div>Train and evaluate the models in a direct fashion.</p>
 <p>The following steps are applied to every model:</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3398,7 +3398,7 @@ <h1 id="directregressor">DirectRegressor</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='directregressor-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3413,17 +3413,16 @@ <h1 id="directregressor">DirectRegressor</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='directregressor-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='directregressor-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3431,7 +3430,7 @@ <h1 id="directregressor">DirectRegressor</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3511,7 +3510,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directregressor-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directregressor-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='directregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='directregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='directregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='directregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='directregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='directregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='directregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='directregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='directregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='directregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='directregressor-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='directregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='directregressor-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='directregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='directregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3555,7 +3554,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directregressor-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='directregressor-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='directregressor-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='directregressor-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='directregressor-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='directregressor-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='directregressor-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3567,7 +3566,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#directregressor-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#directregressor-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='directregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='directregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3589,7 +3588,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='directregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3634,7 +3633,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='directregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#directregressor-save">saving</a> the instance. The affected
@@ -3649,7 +3648,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='directregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='directregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#directregressor-save">saving</a>. Deleted models are not removed from
@@ -3660,7 +3659,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='directregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3687,7 +3686,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='directregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3726,7 +3725,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='directregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3751,7 +3750,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='directregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3763,7 +3762,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='directregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3781,7 +3780,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='directregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3790,7 +3789,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='directregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3799,9 +3798,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='directregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='directregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L49>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='directregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L54>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3814,7 +3813,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='directregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3834,7 +3833,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='directregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3854,7 +3853,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='directregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='directregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3903,7 +3902,7 @@ <h2 id="methods">Methods</h2>
       <nav class="md-footer__inner md-grid" aria-label="Footer" >
         
           
-          <a href="../directforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: DirectRegressor" rel="prev">
+          <a href="../directforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: DirectForecaster" rel="prev">
             <div class="md-footer__button md-icon">
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
             </div>
@@ -3912,7 +3911,7 @@ <h2 id="methods">Methods</h2>
                 Previous
               </span>
               <div class="md-ellipsis">
-                DirectRegressor
+                DirectForecaster
               </div>
             </div>
           </a>
@@ -4032,4 +4031,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/successivehalvingclassifier/index.html b/docs/API/training/successivehalvingclassifier/index.html
index 371156460..da2fd03ff 100644
--- a/docs/API/training/successivehalvingclassifier/index.html
+++ b/docs/API/training/successivehalvingclassifier/index.html
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1295,7 +1295,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="successivehalvingclassifier">SuccessiveHalvingClassifier</h1>
 <hr />
-<p><a id='SuccessiveHalvingClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingClassifier</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L903>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
+<p><a id='SuccessiveHalvingClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingClassifier</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L910>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3405,7 +3405,7 @@ <h1 id="successivehalvingclassifier">SuccessiveHalvingClassifier</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='successivehalvingclassifier-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3420,17 +3420,16 @@ <h1 id="successivehalvingclassifier">SuccessiveHalvingClassifier</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='successivehalvingclassifier-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='successivehalvingclassifier-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3438,7 +3437,7 @@ <h1 id="successivehalvingclassifier">SuccessiveHalvingClassifier</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3518,7 +3517,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingclassifier-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingclassifier-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingclassifier-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingclassifier-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3562,7 +3561,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingclassifier-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingclassifier-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='successivehalvingclassifier-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='successivehalvingclassifier-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='successivehalvingclassifier-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='successivehalvingclassifier-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='successivehalvingclassifier-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3574,7 +3573,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingclassifier-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='successivehalvingclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='successivehalvingclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3596,7 +3595,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='successivehalvingclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3641,7 +3640,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='successivehalvingclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#successivehalvingclassifier-save">saving</a> the instance. The affected
@@ -3656,7 +3655,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='successivehalvingclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#successivehalvingclassifier-save">saving</a>. Deleted models are not removed from
@@ -3667,7 +3666,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='successivehalvingclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3694,7 +3693,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='successivehalvingclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3733,7 +3732,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='successivehalvingclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3758,7 +3757,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='successivehalvingclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3770,7 +3769,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='successivehalvingclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3788,7 +3787,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='successivehalvingclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3797,7 +3796,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='successivehalvingclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3806,9 +3805,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='successivehalvingclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L98>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='successivehalvingclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L104>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3821,7 +3820,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='successivehalvingclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3841,7 +3840,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='successivehalvingclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3861,7 +3860,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='successivehalvingclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3926,13 +3925,13 @@ <h2 id="methods">Methods</h2>
         
         
           
-          <a href="../successivehalvingforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: SuccessiveHalvingRegressor" rel="next">
+          <a href="../successivehalvingforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: SuccessiveHalvingForecaster" rel="next">
             <div class="md-footer__title">
               <span class="md-footer__direction">
                 Next
               </span>
               <div class="md-ellipsis">
-                SuccessiveHalvingRegressor
+                SuccessiveHalvingForecaster
               </div>
             </div>
             <div class="md-footer__button md-icon">
@@ -4039,4 +4038,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/successivehalvingforecaster/index.html b/docs/API/training/successivehalvingforecaster/index.html
index 4b75183ed..c778bfd0a 100644
--- a/docs/API/training/successivehalvingforecaster/index.html
+++ b/docs/API/training/successivehalvingforecaster/index.html
@@ -22,7 +22,7 @@
       <meta name="generator" content="mkdocs-1.5.2, mkdocs-material-9.1.21">
     
     
-  <title>SuccessiveHalvingRegressor - ATOM</title>
+  <title>SuccessiveHalvingForecaster - ATOM</title>
 
     
       <link rel="stylesheet" href="../../../assets/stylesheets/main.eebd395e.min.css">
@@ -135,7 +135,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              SuccessiveHalvingRegressor
+              SuccessiveHalvingForecaster
             
           </span>
         </div>
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1215,12 +1215,12 @@
       
       
         <label class="md-nav__link md-nav__link--active" for="__toc">
-          SuccessiveHalvingRegressor
+          SuccessiveHalvingForecaster
           <span class="md-nav__icon md-icon"></span>
         </label>
       
       <a href="./" class="md-nav__link md-nav__link--active">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
       
         
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="successivehalvingforecaster">SuccessiveHalvingForecaster</h1>
 <hr />
-<p><a id='SuccessiveHalvingForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingForecaster</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1129>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
+<p><a id='SuccessiveHalvingForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingForecaster</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1136>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3400,7 +3400,7 @@ <h1 id="successivehalvingforecaster">SuccessiveHalvingForecaster</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='successivehalvingforecaster-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3415,17 +3415,16 @@ <h1 id="successivehalvingforecaster">SuccessiveHalvingForecaster</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='successivehalvingforecaster-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='successivehalvingforecaster-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3433,7 +3432,7 @@ <h1 id="successivehalvingforecaster">SuccessiveHalvingForecaster</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3510,7 +3509,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingforecaster-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingforecaster-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingforecaster-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingforecaster-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3554,7 +3553,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingforecaster-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingforecaster-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='successivehalvingforecaster-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='successivehalvingforecaster-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='successivehalvingforecaster-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='successivehalvingforecaster-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='successivehalvingforecaster-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3566,7 +3565,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingforecaster-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='successivehalvingforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='successivehalvingforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3588,7 +3587,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='successivehalvingforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3633,7 +3632,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='successivehalvingforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#successivehalvingforecaster-save">saving</a> the instance. The affected
@@ -3648,7 +3647,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='successivehalvingforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#successivehalvingforecaster-save">saving</a>. Deleted models are not removed from
@@ -3659,7 +3658,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='successivehalvingforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3686,7 +3685,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='successivehalvingforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3725,7 +3724,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='successivehalvingforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3750,7 +3749,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='successivehalvingforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3762,7 +3761,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='successivehalvingforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3780,7 +3779,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='successivehalvingforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3789,7 +3788,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='successivehalvingforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3798,9 +3797,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='successivehalvingforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L98>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='successivehalvingforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L104>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3813,7 +3812,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='successivehalvingforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3833,7 +3832,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='successivehalvingforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3853,7 +3852,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='successivehalvingforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4031,4 +4030,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/successivehalvingregressor/index.html b/docs/API/training/successivehalvingregressor/index.html
index 8a2f7b4af..a15e0a62f 100644
--- a/docs/API/training/successivehalvingregressor/index.html
+++ b/docs/API/training/successivehalvingregressor/index.html
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1206,7 +1206,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="successivehalvingregressor">SuccessiveHalvingRegressor</h1>
 <hr />
-<p><a id='SuccessiveHalvingRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingRegressor</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1348>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
+<p><a id='SuccessiveHalvingRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>SuccessiveHalvingRegressor</strong>(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1355>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#successive-halving">successive halving</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3400,7 +3400,7 @@ <h1 id="successivehalvingregressor">SuccessiveHalvingRegressor</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='successivehalvingregressor-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3415,17 +3415,16 @@ <h1 id="successivehalvingregressor">SuccessiveHalvingRegressor</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='successivehalvingregressor-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='successivehalvingregressor-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3433,7 +3432,7 @@ <h1 id="successivehalvingregressor">SuccessiveHalvingRegressor</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3513,7 +3512,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingregressor-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingregressor-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='successivehalvingregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='successivehalvingregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='successivehalvingregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='successivehalvingregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='successivehalvingregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='successivehalvingregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='successivehalvingregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='successivehalvingregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='successivehalvingregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='successivehalvingregressor-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='successivehalvingregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='successivehalvingregressor-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='successivehalvingregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='successivehalvingregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3557,7 +3556,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingregressor-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='successivehalvingregressor-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='successivehalvingregressor-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='successivehalvingregressor-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='successivehalvingregressor-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='successivehalvingregressor-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='successivehalvingregressor-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3569,7 +3568,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#successivehalvingregressor-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='successivehalvingregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='successivehalvingregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3591,7 +3590,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='successivehalvingregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3636,7 +3635,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='successivehalvingregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#successivehalvingregressor-save">saving</a> the instance. The affected
@@ -3651,7 +3650,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='successivehalvingregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#successivehalvingregressor-save">saving</a>. Deleted models are not removed from
@@ -3662,7 +3661,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='successivehalvingregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3689,7 +3688,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='successivehalvingregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3728,7 +3727,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='successivehalvingregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3753,7 +3752,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='successivehalvingregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3765,7 +3764,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='successivehalvingregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3783,7 +3782,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='successivehalvingregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3792,7 +3791,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='successivehalvingregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3801,9 +3800,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='successivehalvingregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L98>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='successivehalvingregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L104>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3816,7 +3815,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='successivehalvingregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3836,7 +3835,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='successivehalvingregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3856,7 +3855,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='successivehalvingregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='successivehalvingregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3905,7 +3904,7 @@ <h2 id="methods">Methods</h2>
       <nav class="md-footer__inner md-grid" aria-label="Footer" >
         
           
-          <a href="../successivehalvingforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: SuccessiveHalvingRegressor" rel="prev">
+          <a href="../successivehalvingforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: SuccessiveHalvingForecaster" rel="prev">
             <div class="md-footer__button md-icon">
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
             </div>
@@ -3914,7 +3913,7 @@ <h2 id="methods">Methods</h2>
                 Previous
               </span>
               <div class="md-ellipsis">
-                SuccessiveHalvingRegressor
+                SuccessiveHalvingForecaster
               </div>
             </div>
           </a>
@@ -4034,4 +4033,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/trainsizingclassifier/index.html b/docs/API/training/trainsizingclassifier/index.html
index 3003263c3..f4358d609 100644
--- a/docs/API/training/trainsizingclassifier/index.html
+++ b/docs/API/training/trainsizingclassifier/index.html
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1206,7 +1206,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1337,7 +1337,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="trainsizingclassifier">TrainSizingClassifier</h1>
 <hr />
-<p><a id='TrainSizingClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingClassifier</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1570>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
+<p><a id='TrainSizingClassifier'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingClassifier</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1577>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3411,7 +3411,7 @@ <h1 id="trainsizingclassifier">TrainSizingClassifier</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='trainsizingclassifier-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3426,17 +3426,16 @@ <h1 id="trainsizingclassifier">TrainSizingClassifier</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='trainsizingclassifier-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='trainsizingclassifier-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3444,7 +3443,7 @@ <h1 id="trainsizingclassifier">TrainSizingClassifier</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3524,7 +3523,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingclassifier-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingclassifier-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingclassifier-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingclassifier-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingclassifier-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingclassifier-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingclassifier-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingclassifier-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingclassifier-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingclassifier-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingclassifier-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingclassifier-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingclassifier-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingclassifier-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingclassifier-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingclassifier-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingclassifier-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3568,7 +3567,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingclassifier-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingclassifier-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='trainsizingclassifier-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='trainsizingclassifier-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='trainsizingclassifier-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='trainsizingclassifier-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='trainsizingclassifier-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3580,7 +3579,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingclassifier-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='trainsizingclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='trainsizingclassifier-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3602,7 +3601,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='trainsizingclassifier-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3647,7 +3646,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='trainsizingclassifier-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#trainsizingclassifier-save">saving</a> the instance. The affected
@@ -3662,7 +3661,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='trainsizingclassifier-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#trainsizingclassifier-save">saving</a>. Deleted models are not removed from
@@ -3673,7 +3672,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='trainsizingclassifier-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3700,7 +3699,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='trainsizingclassifier-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3739,7 +3738,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='trainsizingclassifier-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3764,7 +3763,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='trainsizingclassifier-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3776,7 +3775,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='trainsizingclassifier-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3794,7 +3793,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='trainsizingclassifier-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3803,7 +3802,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='trainsizingclassifier-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3812,9 +3811,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='trainsizingclassifier-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L188>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='trainsizingclassifier-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L195>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3827,7 +3826,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='trainsizingclassifier-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3847,7 +3846,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='trainsizingclassifier-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3867,7 +3866,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='trainsizingclassifier-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3932,13 +3931,13 @@ <h2 id="methods">Methods</h2>
         
         
           
-          <a href="../trainsizingforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: TrainSizingRegressor" rel="next">
+          <a href="../trainsizingforecaster/" class="md-footer__link md-footer__link--next" aria-label="Next: TrainSizingForecaster" rel="next">
             <div class="md-footer__title">
               <span class="md-footer__direction">
                 Next
               </span>
               <div class="md-ellipsis">
-                TrainSizingRegressor
+                TrainSizingForecaster
               </div>
             </div>
             <div class="md-footer__button md-icon">
@@ -4045,4 +4044,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/trainsizingforecaster/index.html b/docs/API/training/trainsizingforecaster/index.html
index 1dda57990..f9bd9cde5 100644
--- a/docs/API/training/trainsizingforecaster/index.html
+++ b/docs/API/training/trainsizingforecaster/index.html
@@ -22,7 +22,7 @@
       <meta name="generator" content="mkdocs-1.5.2, mkdocs-material-9.1.21">
     
     
-  <title>TrainSizingRegressor - ATOM</title>
+  <title>TrainSizingForecaster - ATOM</title>
 
     
       <link rel="stylesheet" href="../../../assets/stylesheets/main.eebd395e.min.css">
@@ -135,7 +135,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              TrainSizingRegressor
+              TrainSizingForecaster
             
           </span>
         </div>
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1206,7 +1206,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1257,12 +1257,12 @@
       
       
         <label class="md-nav__link md-nav__link--active" for="__toc">
-          TrainSizingRegressor
+          TrainSizingForecaster
           <span class="md-nav__icon md-icon"></span>
         </label>
       
       <a href="./" class="md-nav__link md-nav__link--active">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
       
         
@@ -3322,7 +3322,7 @@
 
 <h1 id="trainsizingforecaster">TrainSizingForecaster</h1>
 <hr />
-<p><a id='TrainSizingForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingForecaster</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1801>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
+<p><a id='TrainSizingForecaster'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingForecaster</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L1808>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3406,7 +3406,7 @@ <h1 id="trainsizingforecaster">TrainSizingForecaster</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='trainsizingforecaster-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3421,17 +3421,16 @@ <h1 id="trainsizingforecaster">TrainSizingForecaster</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='trainsizingforecaster-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='trainsizingforecaster-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3439,7 +3438,7 @@ <h1 id="trainsizingforecaster">TrainSizingForecaster</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3516,7 +3515,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingforecaster-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingforecaster-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingforecaster-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingforecaster-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingforecaster-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingforecaster-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingforecaster-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingforecaster-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingforecaster-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingforecaster-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingforecaster-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingforecaster-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingforecaster-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingforecaster-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingforecaster-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingforecaster-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingforecaster-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3560,7 +3559,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingforecaster-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingforecaster-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='trainsizingforecaster-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='trainsizingforecaster-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='trainsizingforecaster-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='trainsizingforecaster-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='trainsizingforecaster-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3572,7 +3571,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingforecaster-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='trainsizingforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='trainsizingforecaster-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3594,7 +3593,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='trainsizingforecaster-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3639,7 +3638,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='trainsizingforecaster-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#trainsizingforecaster-save">saving</a> the instance. The affected
@@ -3654,7 +3653,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='trainsizingforecaster-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#trainsizingforecaster-save">saving</a>. Deleted models are not removed from
@@ -3665,7 +3664,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='trainsizingforecaster-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3692,7 +3691,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='trainsizingforecaster-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3731,7 +3730,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='trainsizingforecaster-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3756,7 +3755,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='trainsizingforecaster-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3768,7 +3767,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='trainsizingforecaster-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3786,7 +3785,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='trainsizingforecaster-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3795,7 +3794,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='trainsizingforecaster-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3804,9 +3803,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='trainsizingforecaster-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L188>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='trainsizingforecaster-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L195>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3819,7 +3818,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='trainsizingforecaster-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3839,7 +3838,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='trainsizingforecaster-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3859,7 +3858,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='trainsizingforecaster-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -4037,4 +4036,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/API/training/trainsizingregressor/index.html b/docs/API/training/trainsizingregressor/index.html
index 19555c7d4..3410f981e 100644
--- a/docs/API/training/trainsizingregressor/index.html
+++ b/docs/API/training/trainsizingregressor/index.html
@@ -1164,7 +1164,7 @@
   
     <li class="md-nav__item">
       <a href="../directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1206,7 +1206,7 @@
   
     <li class="md-nav__item">
       <a href="../successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1248,7 +1248,7 @@
   
     <li class="md-nav__item">
       <a href="../trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3322,7 +3322,7 @@
 
 <h1 id="trainsizingregressor">TrainSizingRegressor</h1>
 <hr />
-<p><a id='TrainSizingRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingRegressor</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine=None, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L2025>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
+<p><a id='TrainSizingRegressor'></a><div class='sign'><em>class</em> atom.training.<strong style='color:#008AB8'>TrainSizingRegressor</strong>(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors="skip", n_jobs=1, device="cpu", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend="loky", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L2032>[source]</a></span></div>Train and evaluate the models in a <a class="autorefs autorefs-internal" href="../../../user_guide/training/#train-sizing">train sizing</a> fashion.</p>
 <p>The following steps are applied to every model (per iteration):</p>
 <ol>
 <li>Apply <a class="autorefs autorefs-internal" href="../../../user_guide/training/#hyperparameter-tuning">hyperparameter tuning</a> (optional).</li>
@@ -3406,7 +3406,7 @@ <h1 id="trainsizingregressor">TrainSizingRegressor</h1>
 <li>"keep": Keep the model in its state at failure. Note that
   this model can break down many other methods after training.
   This option is useful to be able to rerun hyperparameter
-  optimization after failure without losing previous succesfull
+  optimization after failure without losing previous successful
   trials.</li>
 </ul>
 <p></div><a id='trainsizingregressor-n_jobs'></a><strong>n_jobs: int, default=1</strong><br><div markdown class='param'>
@@ -3421,17 +3421,16 @@ <h1 id="trainsizingregressor">TrainSizingRegressor</h1>
 follows the <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter">SYCL_DEVICE_FILTER</a> filter selector, e.g.
 <code class="language-python highlight"><span class="n">device</span><span class="o">=</span><span class="s2">&quot;gpu&quot;</span></code> to use the GPU. Read more in the
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#gpu-acceleration">user guide</a>.</p>
-<p></div><a id='trainsizingregressor-engine'></a><strong>engine: dict or None, default=None</strong><br><div markdown class='param'>
+<p></div><a id='trainsizingregressor-engine'></a><strong>engine: dict, default={"data": "numpy", "estimator": "sklearn"}</strong><br><div markdown class='param'>
 Execution engine to use for <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#data-acceleration">data</a> and
 <a class="autorefs autorefs-internal" href="../../../user_guide/accelerating/#estimator-acceleration">estimators</a>. The value should be a
 dictionary with keys <code>data</code> and/or <code>estimator</code>, with their
-corresponding choice as values. If None, the default options
-are selected. Choose from:</p>
+corresponding choice as values. Choose from:</p>
 <ul>
 <li>
 <p>"data":</p>
 <ul>
-<li>"numpy" (default)</li>
+<li>"numpy"</li>
 <li>"pyarrow"</li>
 <li>"modin"</li>
 </ul>
@@ -3439,7 +3438,7 @@ <h1 id="trainsizingregressor">TrainSizingRegressor</h1>
 <li>
 <p>"estimator":</p>
 <ul>
-<li>"sklearn" (default)</li>
+<li>"sklearn"</li>
 <li>"sklearnex"</li>
 <li>"cuml"</li>
 </ul>
@@ -3519,7 +3518,7 @@ <h3 id="data-attributes">Data attributes</h3>
 Updating the dataset will automatically update the response of these
 attributes accordingly.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingregressor-columns'></a><strong>columns: series</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingregressor-features'></a><strong>features: series</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingregressor-dataset'></a><strong>dataset: dataframe</strong><br><div markdown class='param'>Complete data set.</div><a id='trainsizingregressor-train'></a><strong>train: dataframe</strong><br><div markdown class='param'>Training set.</div><a id='trainsizingregressor-test'></a><strong>test: dataframe</strong><br><div markdown class='param'>Test set.</div><a id='trainsizingregressor-X'></a><strong>X: dataframe</strong><br><div markdown class='param'>Feature set.</div><a id='trainsizingregressor-y'></a><strong>y: series | dataframe</strong><br><div markdown class='param'>Target column(s).</div><a id='trainsizingregressor-X_train'></a><strong>X_train: dataframe</strong><br><div markdown class='param'>Features of the training set.</div><a id='trainsizingregressor-y_train'></a><strong>y_train: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the training set.</div><a id='trainsizingregressor-X_test'></a><strong>X_test: dataframe</strong><br><div markdown class='param'>Features of the test set.</div><a id='trainsizingregressor-y_test'></a><strong>y_test: series | dataframe</strong><br><div markdown class='param'>Target column(s) of the test set.</div><a id='trainsizingregressor-shape'></a><strong>shape: tuple[int, int]</strong><br><div markdown class='param'>Shape of the dataset (n_rows, n_columns).</div><a id='trainsizingregressor-columns'></a><strong>columns: index</strong><br><div markdown class='param'>Name of all the columns.</div><a id='trainsizingregressor-n_columns'></a><strong>n_columns: int</strong><br><div markdown class='param'>Number of columns.</div><a id='trainsizingregressor-features'></a><strong>features: index</strong><br><div markdown class='param'>Name of the features.</div><a id='trainsizingregressor-n_features'></a><strong>n_features: int</strong><br><div markdown class='param'>Number of features.</div><a id='trainsizingregressor-target'></a><strong>target: str | list[str]</strong><br><div markdown class='param'>Name of the target column(s).</div></td></tr></p>
 </table>
 <p><br></p>
 <h3 id="utility-attributes">Utility attributes</h3>
@@ -3563,7 +3562,7 @@ <h3 id="plot-attributes">Plot attributes</h3>
 <p>The plot attributes are used to customize the plot's aesthetics. Read
 more in the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">user guide</a>.</p>
 <table class="table_params">
-<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingregressor-palette'></a><strong>palette: str | SEQUENCE</strong><br><div markdown class='param'>Color palette.</p>
+<p><tr><td class='td_title'><strong>Attributes</strong></td><td class='td_params'><a id='trainsizingregressor-palette'></a><strong>palette: str | sequence</strong><br><div markdown class='param'>Color palette.</p>
 <p>Specify one of plotly's <a href="https://plotly.com/python/discrete-color/">built-in palettes</a> or create
 a custom one, e.g. <code>atom.palette = ["red", "green", "blue"]</code>.</div><a id='trainsizingregressor-title_fontsize'></a><strong>title_fontsize: int</strong><br><div markdown class='param'>Fontsize for the plot's title.</div><a id='trainsizingregressor-label_fontsize'></a><strong>label_fontsize: int</strong><br><div markdown class='param'>Fontsize for the labels, legend and hover information.</div><a id='trainsizingregressor-tick_fontsize'></a><strong>tick_fontsize: int</strong><br><div markdown class='param'>Fontsize for the ticks along the plot's axes.</div><a id='trainsizingregressor-line_width'></a><strong>line_width: int</strong><br><div markdown class='param'>Width of the line plots.</div><a id='trainsizingregressor-marker_size'></a><strong>marker_size: int</strong><br><div markdown class='param'>Size of the markers.</div></td></tr></p>
 </table>
@@ -3575,7 +3574,7 @@ <h2 id="methods">Methods</h2>
 <p><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-available_models">available_models</a></td><td>Give an overview of the available predefined models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-canvas">canvas</a></td><td>Create a figure with multiple plots.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-clear">clear</a></td><td>Reset attributes and clear cache from all models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-delete">delete</a></td><td>Delete models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-evaluate">evaluate</a></td><td>Get all models' scores for the provided metrics.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-export_pipeline">export_pipeline</a></td><td>Export the pipeline to a sklearn-like object.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-get_class_weight">get_class_weight</a></td><td>Return class weights for a balanced data set.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-get_params">get_params</a></td><td>Get parameters for this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-log">log</a></td><td>Print message and save to log file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-merge">merge</a></td><td>Merge another instance of the same class into this one.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-update_layout">update_layout</a></td><td>Update the properties of the plot's layout.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-update_traces">update_traces</a></td><td>Update the properties of the plot's traces.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-reset_aesthetics">reset_aesthetics</a></td><td>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-run">run</a></td><td>Train and evaluate the models.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-save">save</a></td><td>Save the instance to a pickle file.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-set_params">set_params</a></td><td>Set the parameters of this estimator.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-stacking">stacking</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</td></tr><tr><td><a class="autorefs autorefs-internal" href="#trainsizingregressor-voting">voting</a></td><td>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</td></tr></p>
 </table>
 <p><br></p>
-<p><a id='trainsizingregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L412>[source]</a></span></div>Give an overview of the available predefined models.</p>
+<p><a id='trainsizingregressor-available_models'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>available_models</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L411>[source]</a></span></div>Give an overview of the available predefined models.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Returns</strong></td><td class='td_params'><a id='available_models-pd.DataFrame'></a><strong>pd.DataFrame</strong><br><div markdown class='param'>
 Information about the available <a class="autorefs autorefs-internal" href="../../../user_guide/models/#predefined-models">predefined models</a>. Columns
@@ -3597,7 +3596,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1015>[source]</a></span></div>Create a figure with multiple plots.</p>
+<p><a id='trainsizingregressor-canvas'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>canvas</strong>(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend="out", figsize=None, filename=None, display=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L985>[source]</a></span></div>Create a figure with multiple plots.</p>
 <p>This <code>@contextmanager</code> allows you to draw many plots in one
 figure. The default option is to add two plots side by side.
 See the <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#canvas">user guide</a> for an example.</p>
@@ -3642,7 +3641,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L457>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
+<p><a id='trainsizingregressor-clear'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>clear</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L456>[source]</a></span></div>Reset attributes and clear cache from all models.</p>
 <p>Reset certain model attributes to their initial state, deleting
 potentially large data arrays. Use this method to free some
 memory before <a class="autorefs autorefs-internal" href="#trainsizingregressor-save">saving</a> the instance. The affected
@@ -3657,7 +3656,7 @@ <h2 id="methods">Methods</h2>
 <li>Cached <a class="autorefs autorefs-internal" href="../../../user_guide/data_management/#data-sets">holdout data sets</a></li>
 </ul>
 <p><br><br></p>
-<p><a id='trainsizingregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L478>[source]</a></span></div>Delete models.</p>
+<p><a id='trainsizingregressor-delete'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>delete</strong>(models=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L477>[source]</a></span></div>Delete models.</p>
 <p>If all models are removed, the metric is reset. Use this method
 to drop unwanted models from the pipeline or to free some memory
 before <a class="autorefs autorefs-internal" href="#trainsizingregressor-save">saving</a>. Deleted models are not removed from
@@ -3668,7 +3667,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L505>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
+<p><a id='trainsizingregressor-evaluate'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>evaluate</strong>(metric=None, dataset="test", threshold=0.5, sample_weight=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L504>[source]</a></span></div>Get all models' scores for the provided metrics.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='evaluate-metric'></a><strong>metric: str, func, scorer, sequence or None, default=None</strong><br><div markdown class='param'>
 Metric to calculate. If None, it returns an overview of
@@ -3695,7 +3694,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L563>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
+<p><a id='trainsizingregressor-export_pipeline'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>export_pipeline</strong>(model=None, memory=None, verbose=None)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L562>[source]</a></span></div>Export the pipeline to a sklearn-like object.</p>
 <p>Optionally, you can add a model as final estimator. The
 returned pipeline is already fitted on the training set.</p>
 <div class="admonition info">
@@ -3734,7 +3733,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L626>[source]</a></span></div>Return class weights for a balanced data set.</p>
+<p><a id='trainsizingregressor-get_class_weight'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>get_class_weight</strong>(dataset="train")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L625>[source]</a></span></div>Return class weights for a balanced data set.</p>
 <p>Statistically, the class weights re-balance the data set so
 that the sampled data set represents the target population
 as closely as possible. The returned weights are inversely
@@ -3759,7 +3758,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L999>[source]</a></span></div>Print message and save to log file.</p>
+<p><a id='trainsizingregressor-log'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>log</strong>(msg, level=0, severity="info")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L969>[source]</a></span></div>Print message and save to log file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='log-msg'></a><strong>msg: int, float or str</strong><br><div markdown class='param'>
 Message to save to the logger and print to stdout.</p>
@@ -3771,7 +3770,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L696>[source]</a></span></div>Merge another instance of the same class into this one.</p>
+<p><a id='trainsizingregressor-merge'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>merge</strong>(other, suffix="2")<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L695>[source]</a></span></div>Merge another instance of the same class into this one.</p>
 <p>Branches, models, metrics and attributes of the other instance
 are merged into this one. If there are branches and/or models
 with the same name, they are merged adding the <code>suffix</code>
@@ -3789,7 +3788,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1121>[source]</a></span></div>Update the properties of the plot's layout.</p>
+<p><a id='trainsizingregressor-update_layout'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_layout</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1091>[source]</a></span></div>Update the properties of the plot's layout.</p>
 <p>Recursively update the structure of the original layout with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3798,7 +3797,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1135>[source]</a></span></div>Update the properties of the plot's traces.</p>
+<p><a id='trainsizingregressor-update_traces'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>update_traces</strong>(**kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1105>[source]</a></span></div>Update the properties of the plot's traces.</p>
 <p>Recursively update the structure of the original traces with
 the values in the arguments.</p>
 <table class="table_params">
@@ -3807,9 +3806,9 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots.py#L1108>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
+<p><a id='trainsizingregressor-reset_aesthetics'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>reset_aesthetics</strong>()<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/plots/base.py#L1078>[source]</a></span></div>Reset the plot <a class="autorefs autorefs-internal" href="../../../user_guide/plots/#aesthetics">aesthetics</a> to their default values.</p>
 <p><br><br></p>
-<p><a id='trainsizingregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L188>[source]</a></span></div>Train and evaluate the models.</p>
+<p><a id='trainsizingregressor-run'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>run</strong>(*arrays)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/training.py#L195>[source]</a></span></div>Train and evaluate the models.</p>
 <p>Read more in the <a class="autorefs autorefs-internal" href="../../../user_guide/training/#training">user guide</a>.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='run-\*arrays'></a><strong>*arrays: sequence of indexables</strong><br><div markdown class='param'>
@@ -3822,7 +3821,7 @@ <h2 id="methods">Methods</h2>
 </ul>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1033>[source]</a></span></div>Save the instance to a pickle file.</p>
+<p><a id='trainsizingregressor-save'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>save</strong>(filename="auto", save_data=True)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/basetransformer.py#L1003>[source]</a></span></div>Save the instance to a pickle file.</p>
 <table class="table_params">
 <p><tr><td class='td_title'><strong>Parameters</strong></td><td class='td_params'><a id='save-filename'></a><strong>filename: str, default="auto"</strong><br><div markdown class='param'>
 Name of the file. Use "auto" for automatic naming.</p>
@@ -3842,7 +3841,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L758>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
+<p><a id='trainsizingregressor-stacking'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>stacking</strong>(models=None, name="Stack", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L757>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#stacking">Stacking</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3862,7 +3861,7 @@ <h2 id="methods">Methods</h2>
 </div></td></tr></p>
 </table>
 <p><br><br></p>
-<p><a id='trainsizingregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L836>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
+<p><a id='trainsizingregressor-voting'></a><div class='sign'><em>method</em> <strong style='color:#008AB8'>voting</strong>(models=None, name="Vote", **kwargs)<span style='float:right'><a href=https://github.com/tvdboom/ATOM/blob/master/atom/baserunner.py#L835>[source]</a></span></div>Add a <a class="autorefs autorefs-internal" href="../../../user_guide/models/#voting">Voting</a> model to the pipeline.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <p>Combining models trained on different branches into one
@@ -3911,7 +3910,7 @@ <h2 id="methods">Methods</h2>
       <nav class="md-footer__inner md-grid" aria-label="Footer" >
         
           
-          <a href="../trainsizingforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: TrainSizingRegressor" rel="prev">
+          <a href="../trainsizingforecaster/" class="md-footer__link md-footer__link--prev" aria-label="Previous: TrainSizingForecaster" rel="prev">
             <div class="md-footer__button md-icon">
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
             </div>
@@ -3920,7 +3919,7 @@ <h2 id="methods">Methods</h2>
                 Previous
               </span>
               <div class="md-ellipsis">
-                TrainSizingRegressor
+                TrainSizingForecaster
               </div>
             </div>
           </a>
@@ -4040,4 +4039,4 @@ <h2 id="methods">Methods</h2>
       
     
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/about/index.html b/docs/about/index.html
index 57f5ee7d4..6e5eb5e7d 100644
--- a/docs/about/index.html
+++ b/docs/about/index.html
@@ -1234,7 +1234,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1276,7 +1276,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1318,7 +1318,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/changelog/v4.x.x/index.html b/docs/changelog/v4.x.x/index.html
index c69b86a7c..ae671a6c2 100644
--- a/docs/changelog/v4.x.x/index.html
+++ b/docs/changelog/v4.x.x/index.html
@@ -1160,7 +1160,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1202,7 +1202,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/changelog/v5.x.x/index.html b/docs/changelog/v5.x.x/index.html
index ee3ad4453..d8d38ec30 100644
--- a/docs/changelog/v5.x.x/index.html
+++ b/docs/changelog/v5.x.x/index.html
@@ -1160,7 +1160,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1202,7 +1202,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3349,6 +3349,7 @@ <h2 id="version-600">Version 6.0.0</h2>
 <li>Fixed a bug where the <a class="autorefs autorefs-internal" href="../../API/models/adab/#adaboost-register">register</a> method failed in Databricks.</li>
 <li>Fixed a bug where tuning hyperparameter for a <code>base_estimator</code> inside a custom
   meta-estimator would fail.</li>
+<li>Fixed a bug where the data properties' <code>@setter</code> could fail for numpy arrays.</li>
 </ul>
 <p><a name="v520"></a></p>
 <h2 id="version-520">Version 5.2.0</h2>
diff --git a/docs/contributing/index.html b/docs/contributing/index.html
index 95e43c0da..5693db553 100644
--- a/docs/contributing/index.html
+++ b/docs/contributing/index.html
@@ -1160,7 +1160,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1202,7 +1202,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/dependencies/index.html b/docs/dependencies/index.html
index 5a57b0147..fa43008ce 100644
--- a/docs/dependencies/index.html
+++ b/docs/dependencies/index.html
@@ -1160,7 +1160,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1202,7 +1202,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3332,6 +3332,7 @@ <h3 id="required">Required</h3>
 <li><strong><a href="https://www.scipy.org/">scipy</a></strong> (&gt;=1.10.1)</li>
 <li><strong><a href="https://github.com/slundberg/shap/">shap</a></strong> (&gt;=0.42.1)</li>
 <li><strong><a href="http://www.sktime.net/en/latest/">sktime</a></strong> (&gt;=0.20.1)</li>
+<li><strong><a href="https://typeguard.readthedocs.io/en/latest/">typeguard</a></strong> (&gt;=4.1.3)</li>
 <li><strong><a href="https://jaswinder9051998.github.io/zoofs/">zoofs</a></strong> (&gt;=0.1.26)</li>
 </ul>
 <h3 id="optional">Optional</h3>
@@ -3349,7 +3350,7 @@ <h3 id="optional">Optional</h3>
 <li><strong><a href="https://schemdraw.readthedocs.io/en/latest/index.html">schemdraw</a></strong> (&gt;=0.16)</li>
 <li><strong><a href="http://amueller.github.io/word_cloud/">wordcloud</a></strong> (&gt;=1.9.2)</li>
 <li><strong><a href="https://xgboost.readthedocs.io/en/latest/">xgboost</a></strong> (&gt;=1.7.4)</li>
-<li><strong><a href="https://github.com/ydataai/ydata-profiling">ydata-profiling</a></strong> (&gt;=4.3.1)</li>
+<li><strong><a href="https://github.com/ydataai/ydata-profiling">ydata-profiling</a></strong> (&gt;=4.5.1)</li>
 </ul>
 <h3 id="development">Development</h3>
 <p>The development dependencies are not installed with the package, and are
diff --git a/docs/faq/index.html b/docs/faq/index.html
index 1c882fd34..2373ea1cf 100644
--- a/docs/faq/index.html
+++ b/docs/faq/index.html
@@ -1160,7 +1160,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1202,7 +1202,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/getting_started/index.html b/docs/getting_started/index.html
index a4cdcc900..37fdd1d4a 100644
--- a/docs/getting_started/index.html
+++ b/docs/getting_started/index.html
@@ -1208,7 +1208,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1250,7 +1250,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1292,7 +1292,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/index.html b/docs/index.html
index 6aad95952..756473fcf 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1629,7 +1629,7 @@ <h1 style="margin-bottom: -10px">Read our stories</h1>
   
     <li class="md-nav__item">
       <a href="API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1671,7 +1671,7 @@ <h1 style="margin-bottom: -10px">Read our stories</h1>
   
     <li class="md-nav__item">
       <a href="API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1713,7 +1713,7 @@ <h1 style="margin-bottom: -10px">Read our stories</h1>
   
     <li class="md-nav__item">
       <a href="API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/license/index.html b/docs/license/index.html
index bc0b70a4a..1bc96abbe 100644
--- a/docs/license/index.html
+++ b/docs/license/index.html
@@ -1158,7 +1158,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1200,7 +1200,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1242,7 +1242,7 @@
   
     <li class="md-nav__item">
       <a href="../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/scripts/autodocs.py b/docs/scripts/autodocs.py
index a0a69edac..65043e36e 100644
--- a/docs/scripts/autodocs.py
+++ b/docs/scripts/autodocs.py
@@ -50,6 +50,7 @@
     study="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html",
     optimize="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize",
     trial="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html",
+    frozentrial="https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.FrozenTrial.html",
     normal="https://github.com/sktime/sktime/blob/b29e147b54959a53cc96e5be9c3f819717aa38e7/sktime/proba/normal.py#L13",
     forecastinghorizon="https://www.sktime.net/en/stable/api_reference/auto_generated/sktime.forecasting.base.ForecastingHorizon.html#sktime.forecasting.base.ForecastingHorizon",
     interface="https://gradio.app/docs/#interface",
@@ -787,7 +788,6 @@ def render(markdown: str, **kwargs) -> str:
         Modified markdown/html source text of page.
 
     """
-
     autodocs = None
     while match := re.search("(:: )(\w.*?)(?=::|\n\n|\Z)", markdown, re.S):
         command = yaml.safe_load(match.group(2))
@@ -854,15 +854,19 @@ def types_conversion(dtype: str) -> str:
     """
     types = {
         "CustomDict": "dict",
+        "BOOL": "bool",
         "INT": "int",
         "FLOAT": "float",
+        "INDEX": "index",
         "SERIES": "series",
+        "SEQUENCE": "sequence",
         "DATAFRAME": "dataframe",
         "PANDAS": "series | dataframe",
-        "Branch": "[Branch][branches]",
-        "Model": "[model][models]",
+        "BRANCH": "[Branch][branches]",
+        "MODEL": "[model][models]",
         "Study": "[Study][]",
         "Trial": "[Trial][]",
+        "FrozenTrial": "[FrozenTrial][]",
         "Normal": "[Normal][]",
     }
 
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index 1eca19903..2e0614573 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1 @@
-{"config": {"lang": ["en"], "separator": "[\\s\\-]+", "pipeline": ["stopWordFilter"]}, "docs": [{"location": "about/", "title": "About", "text": ""}, {"location": "about/#what-is-it", "title": "What is it?", "text": "<p>Automated Tool for Optimized Modelling (ATOM) is an open-source Python package designed to help data scientists fasten up the exploration phase of their machine learning projects. ATOM is a low-code, easy-to-use library, capable of running experiments quickly and efficiently, enabling the user to go from raw data to generating insights in just a few lines of code. Click here to get started.</p> <p></p>"}, {"location": "about/#what-can-i-do-with-it", "title": "What can I do with it?", "text": "<p>ATOM is an end-to-end solution for machine learning pipelines. It supports the user from raw data ingestion to the final results' analysis and model deployment. Click on the icons to read more about its main functionalities.</p> Data cleaning Feature engineering Model selection Hyperparametertuning Model training Model predictions Experiment logging Analysis &amp;Interpretability"}, {"location": "about/#who-is-it-intended-for", "title": "Who is it intended for?", "text": "<ul> <li>Data scientists that want to fasten up the exploration phase of their machine   learning projects.</li> <li>Data scientists that want to run a simple modelling experiment without having   to spend too much time on coding.</li> <li>Data scientists that are new to Python and are not (yet) familiar with all   the relevant machine learning packages.</li> <li>Data analysts without extensive knowledge of machine learning that want to   try out model-based solutions.</li> <li>Anyone who wants to rapidly build a Proof of Concept, for example during a hackathon.</li> <li>Anyone who is new to the field of machine learning and wants a low-code,   easy to learn package, to get started building predictive pipelines.</li> </ul>"}, {"location": "about/#citing-atom", "title": "Citing ATOM", "text": "<p>If you use ATOM in a scientific publication, please consider citing this documentation page as the resource. ATOM\u2019s first stable release v2.0.3 was made publicly available in November 2019. A formatted version of the citation would look like this:</p> <p>ATOM v2.0.3, November 2019. URL https://tvdboom.github.io/ATOM/</p> <p>BibTeX entry:</p> <pre><code>@Manual{ATOM,\n    title = {ATOM: A Python package for fast exploration of machine learning pipelines},\n    author = {Mavs},\n    year={2019},\n    mont={November},\n    note = {ATOM version 2.0.3},\n    url = {https://tvdboom.github.io/ATOM/},\n}\n</code></pre> <p></p>"}, {"location": "about/#support", "title": "Support", "text": "<p>ATOM recognizes the support from JetBrains by providing core project contributors with a set of developer tools free of charge.</p> <p> </p> <p></p>"}, {"location": "about/#integrations", "title": "Integrations", "text": ""}, {"location": "contributing/", "title": "Contributing", "text": "<p>Are you interested in contributing to ATOM? Do you want to report a bug? Do you have a question? Before you do, please read the following guidelines.</p> <p></p>"}, {"location": "contributing/#submission-context", "title": "Submission context", "text": ""}, {"location": "contributing/#question-or-problem", "title": "Question or problem?", "text": "<p>For quick questions there's no need to open an issue. Check first if the question isn't already answered on the FAQ section. If not, reach us through the discussions page or on the slack channel.</p>"}, {"location": "contributing/#report-a-bug", "title": "Report a bug?", "text": "<p>If you found a bug in the source code, you can help by submitting an issue to the issue tracker in the GitHub repository. Even better, you can submit a Pull Request with a fix. However, before doing so, please read the submission guidelines.</p>"}, {"location": "contributing/#missing-a-feature", "title": "Missing a feature?", "text": "<p>You can request a new feature by submitting an issue to the GitHub Repository. If you would like to implement a new feature, please submit an issue with a proposal for your work first. Please consider what kind of change it is:</p> <ul> <li> <p>For a major feature, first open an issue and outline your proposal so   that it can be discussed. This will also allow us to better coordinate our   efforts, prevent duplication of work, and help you to craft the change so   that it is successfully accepted into the project.</p> </li> <li> <p>Small features and bugs can be crafted and directly submitted as a Pull   Request. However, there is no guarantee that your feature will make it into   <code>master</code>, as it's always a matter of opinion whether if benefits the   overall functionality of the project.</p> </li> </ul>"}, {"location": "contributing/#project-layout", "title": "Project layout", "text": "<p>The latest stable release of ATOM is on the <code>master</code> branch, whereas the latest version of ATOM in development is on the <code>development</code> branch. Make sure you are looking at and working on the correct branch if you're looking to contribute code.</p> <p>In terms of directory structure:</p> <ul> <li>All of ATOM's code sources are in the <code>atom</code> directory.</li> <li>The documentation sources are in the <code>docs_sources</code> directory.</li> <li>Images in the documentation are in the <code>docs_sources/img</code> directory.</li> <li>Tutorial notebooks are in the <code>examples</code> directory. If you want to   include the example to the documentation as well, add the <code>.ipynb</code> file   to <code>docs_sources/examples</code> and update the <code>mkdocs.yml</code> file accordingly.</li> <li>Unit tests are in the <code>tests</code> directory. Make sure to add the tests to the   file corresponding to the module in the <code>atom</code> directory with the code that   is being tested.</li> </ul> <p>Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the <code>development</code> branch.</p> <p></p>"}, {"location": "contributing/#submission-guidelines", "title": "Submission guidelines", "text": ""}, {"location": "contributing/#submitting-an-issue", "title": "Submitting an issue", "text": "<p>Before you submit an issue, please search the issue tracker, maybe an issue for your problem already exists and the discussion might inform you of workarounds readily available.</p> <p>We want to fix all the issues as soon as possible, but before fixing a bug we need to reproduce and confirm it. In order to reproduce bugs we will systematically ask you to provide a minimal reproduction scenario using the custom issue template.</p>"}, {"location": "contributing/#submitting-a-pull-request", "title": "Submitting a pull request", "text": "<p>Before you submit a pull request, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.</p> <ul> <li>Update the documentation so all of your changes are reflected there.</li> <li>Adhere to PEP 8 standards.</li> <li>Use a maximum of 90 characters per line. Try to keep docstrings below   74 characters.</li> <li>Update the project unit tests to test your code changes as thoroughly   as possible.</li> <li>Make sure that your code is properly commented with docstrings and   comments explaining your rationale behind non-obvious coding practices.</li> <li>Run isort: <code>isort atom tests</code>.</li> <li>Run flake8: <code>flake8 --show-source --statistics atom tests</code>.</li> </ul> <p>If your contribution requires a new library dependency:</p> <ul> <li>Double-check that the new dependency is easy to install via pip and Anaconda.</li> <li>The library should support Python 3.8 and higher.</li> <li>Make sure the code works with the latest version of the library.</li> <li>Update the dependencies in the documentation.</li> <li>Add the library with the minimum required version to <code>pyproject.toml</code>.</li> </ul> <p>After submitting your pull request, GitHub will automatically run the tests on your changes and make sure that the updated code builds successfully. The checks are run on Python 3.8, 3.9, 3.10 and 3.11, on Ubuntu and Windows. We also use services that automatically check code style and test coverage.</p>"}, {"location": "dependencies/", "title": "Dependencies", "text": ""}, {"location": "dependencies/#python-os", "title": "Python &amp; OS", "text": "<p>As of the moment, ATOM supports the following Python versions:</p> <ul> <li>Python 3.8</li> <li>Python 3.9</li> <li>Python 3.10</li> <li>Python 3.11</li> </ul> <p>And operating systems:</p> <ul> <li>Linux (Ubuntu, Fedora, etc...)</li> <li>Windows 8.1+</li> <li>macOS (not tested)</li> </ul> <p></p>"}, {"location": "dependencies/#packages", "title": "Packages", "text": ""}, {"location": "dependencies/#required", "title": "Required", "text": "<p>ATOM is built on top of several existing Python libraries. These packages are necessary for its correct functioning.</p> <ul> <li>category-encoders (&gt;=2.6.1)</li> <li>dagshub (&gt;=0.2.10)</li> <li>dill (&gt;=0.3.6)</li> <li>gplearn (&gt;=0.4.2)</li> <li>imbalanced-learn (&gt;=0.11.0)</li> <li>ipython (&gt;=8.11.0)</li> <li>featuretools (&gt;=1.27.0)</li> <li>joblib (&gt;=1.3.1)</li> <li>matplotlib (&gt;=3.7.2)</li> <li>mlflow (&gt;=2.5.0)</li> <li>modin[ray] (&gt;=0.23.0)</li> <li>nltk (&gt;=3.8.1)</li> <li>numpy (&gt;=1.23.0)</li> <li>optuna (&gt;=3.2.0)</li> <li>pandas[parquet] (&gt;=2.0.3)</li> <li>plotly (&gt;=5.15.0)</li> <li>ray[serve] (&gt;=2.6.1)</li> <li>scikit-learn (&gt;=1.3.0)</li> <li>scikit-learn-intelex (&gt;=2023.2.1)</li> <li>scipy (&gt;=1.10.1)</li> <li>shap (&gt;=0.42.1)</li> <li>sktime (&gt;=0.20.1)</li> <li>zoofs (&gt;=0.1.26)</li> </ul>"}, {"location": "dependencies/#optional", "title": "Optional", "text": "<p>Some specific models, utility methods or plots require the installation of additional libraries. You can install all the optional dependencies using <code>pip install atom-ml[full]</code>. Doing so also installs the following libraries:</p> <ul> <li>botorch (&gt;=0.8.5)</li> <li>catboost (&gt;=1.2)</li> <li>explainerdashboard (&gt;=0.4.3)</li> <li>evalml (&gt;=0.79.0)</li> <li>gradio (&gt;=3.19.1)</li> <li>lightgbm (&gt;=3.3.5)</li> <li>pmdarima (&gt;=2.0.3)</li> <li>schemdraw (&gt;=0.16)</li> <li>wordcloud (&gt;=1.9.2)</li> <li>xgboost (&gt;=1.7.4)</li> <li>ydata-profiling (&gt;=4.3.1)</li> </ul>"}, {"location": "dependencies/#development", "title": "Development", "text": "<p>The development dependencies are not installed with the package, and are not required for any of its functionalities. These libraries are only necessary to contribute to the project. Install them using <code>pip install atom-ml[dev]</code>.</p> <p>Linting</p> <ul> <li>isort (&gt;=5.12.0)</li> <li>flake8 (&gt;=6.0.0)</li> <li>flake8-pyproject (&gt;=1.2.2)</li> </ul> <p>Testing</p> <ul> <li>nbmake (&gt;=1.4.1)</li> <li>pytest (&gt;=7.2.1)</li> <li>pytest-cov (&gt;=4.0.0)</li> <li>pytest-xdist (&gt;=3.2.0)</li> <li>scikeras (&gt;=0.11.0)</li> <li>tensorflow (&gt;=2.13.0)</li> </ul> <p>Documentation</p> <ul> <li>mike (&gt;=1.1.2)</li> <li>mkdocs (&gt;=1.4.2)</li> <li>mkdocs-autorefs (&gt;=0.5.0)</li> <li>mkdocs-jupyter (&gt;=0.22.0)</li> <li>mkdocs-material (&gt;=9.1.21)</li> <li>mkdocs-material-extensions (&gt;=1.1.1)</li> <li>mkdocs-simple-hooks (&gt;=0.1.5)</li> <li>pymdown-extensions (&gt;=9.9.2)</li> <li>pyyaml (&gt;=6.0)</li> </ul>"}, {"location": "faq/", "title": "Frequently asked questions", "text": "<p>Here we try to give answers to some questions that have popped up regularly. If you have any other questions, don't hesitate to create a new discussion or post them on the Slack channel! </p> <p>??? faq Is this package related to the Atom text editor?\"     There is, indeed, a text editor with the same name and a similar logo as     this package. Is this a shameless copy? No. When I started the project,     I didn't know about the text editor, and it doesn't require much thinking     to come up with the idea of replacing the letter O of the word atom with     the image of an atom.</p> How does ATOM relate to AutoML? <p>ATOM is not an AutoML tool since it does not automate the search for an optimal pipeline like well known AutoML tools such as auto-sklearn or EvalML do. Instead, ATOM helps the user find the optimal pipeline himself. One of the goals of this package is to help data scientists produce explainable pipelines, and using an AutoML black box function would impede that. That said, it is possible to integrate a EvalML pipeline with atom through the automl method.</p> Is it possible to run deep learning models? <p>Yes. Deep learning models can be added as custom models to the pipeline as long as they follow sklearn's API. For more information, see the deep learning section of the user guide.</p> Can I run atom's methods on just a subset of the columns? <p>Yes, all data cleaning and feature engineering methods accept a <code>columns</code> parameter to only transform the selected features. For example, to only impute the numerical columns in the dataset we could type <code>atom.impute(strat_num=\"mean\", columns=atom.numerical)</code>. The parameter accepts column names, column indices, dtypes or a slice object.</p> How can I compare the same model on different datasets? <p>In many occasions you might want to test how a model performs on datasets processed with different pipelines. For this, atom has the branch system. Create a new branch for every new pipeline you want to test and use the plot methods to compare all models, independent of the branch it was trained on.</p> Can I train models through atom using a GPU? <p>Yes. Refer to the user guide to see what algorithms and models have a GPU implementation. Be aware that it could require additional software and hardware dependencies.</p> How are numerical and categorical columns differentiated? <p>The columns are separated using a dataframe's select_dtypes method. Numerical columns are selected using <code>include=\"number\"</code> whereas categorical columns are selected using <code>exclude=\"number\"</code>.</p> Can I run unsupervised learning pipelines? <p>No. As for now, ATOM only supports supervised machine learning pipelines. However, various unsupervised algorithms can be chosen as strategy in the Pruner class to detect and remove outliers from the dataset.</p> Is there a way to plot multiple models in the same shap plot? <p>No. Unfortunately, there is no way to plot multiple models in the same shap plot since the plots are made by the shap package and passed as <code>matplotlib.axes</code> objects to atom. This means that it's not within the reach of this package to implement such a utility.</p> Can I merge a sklearn pipeline with atom? <p>Yes. Like any other transformer, it is possible to add a sklearn pipeline to atom using the add method. Every transformer in the pipeline is merged independently. The pipeline is not allowed to end with a model since atom manages its own models. If that is the case, add the pipeline using <code>atom.add(pipeline[:-1])</code>.</p> Is it possible to initialize atom with an existing train and test set? <p>Yes. If you already have a separated train and test set you can initialize atom in two ways:</p> <ul> <li><code>atom = ATOMClassifier(train, test)</code></li> <li><code>atom = ATOMClassifier((X_train, y_train), (X_test, y_test))</code></li> </ul> <p>Make sure the train and test size have the same number of columns! If atom is initialized in any of these two ways, the <code>test_size</code> parameter is ignored.</p> Can I train the models using cross-validation? <p>Applying cross-validation means transforming every step of the pipeline multiple times, each with different results. Doing this would prevent ATOM from being able to show the transformation results after every pre-processing step, which means losing the ability to inspect how a transformer changed the dataset. For this reason, it is not possible to apply cross-validation until after a model has been trained. After a model has been trained, the pipeline is defined, and cross-validation can be applied using the cross_validate method. See here an example using cross-validation.</p> Is there a way to process datetime features? <p>Yes, the FeatureExtractor class can automatically extract useful features (day, month, year, etc...) from datetime columns. The extracted features are always encoded to numerical values, so they can be fed directly to a model.</p>"}, {"location": "getting_started/", "title": "Getting started", "text": ""}, {"location": "getting_started/#installation", "title": "Installation", "text": "<p>Install ATOM's newest release easily via <code>pip</code>:</p> <pre><code>pip install -U atom-ml\n</code></pre> <p>or via <code>conda</code>:</p> <pre><code>conda install -c conda-forge atom-ml\n</code></pre> <p>Note</p> <p>Since atom was already taken, download the package under the name <code>atom-ml</code>!</p> <p>Warning</p> <p>ATOM makes use of many other ML libraries, making its dependency list quite long. Because of that, the installation may take longer than you are accustomed to. Be patient!</p> <p></p> <p>Optional dependencies</p> <p>Some specific models, utility methods or plots require the installation of additional libraries. To install the optional dependencies, add <code>[full]</code> after the package's name.</p> <pre><code>pip install -U atom-ml[full]\n</code></pre> <p></p> <p>Latest source</p> <p>Sometimes, new features and bug fixes are already implemented in the <code>development</code> branch, but waiting for the next release to be made available. If you can't wait for that, it's possible to install the package directly from git.</p> <pre><code>pip install git+https://github.com/tvdboom/ATOM.git@development#egg=atom-ml\n</code></pre> <p>Don't forget to include <code>#egg=atom-ml</code> to explicitly name the project, this way pip can track metadata for it without having to have run the <code>setup.py</code> script.</p> <p></p> <p>Contributing</p> <p>If you are planning to contribute to the project, you'll need the development dependencies. Install them adding <code>[dev]</code> after the package's name.</p> <pre><code>pip install -U atom-ml[dev]\n</code></pre> <p>Click here for a complete list of package files for all versions published on PyPI.</p> <p></p>"}, {"location": "getting_started/#usage", "title": "Usage", "text": "<p>ATOM contains a variety of classes and functions to perform data cleaning, feature engineering, model training, plotting and much more. The easiest way to use everything ATOM has to offer is through one of the main classes:</p> <ul> <li>ATOMClassifier for classification tasks.</li> <li>ATOMForecaster for forecasting tasks.</li> <li>ATOMRegressor for regression tasks.</li> </ul> <p>Let's walk you through an example. Click on the SageMaker Studio Lab badge on top of this section to run this example yourself.</p> <p>Make the necessary imports and load the data.</p> <pre><code>import pandas as pd\nfrom atom import ATOMClassifier\n\n# Load the Australian Weather dataset\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)\nprint(X.head())\n</code></pre> <p>Initialize the ATOMClassifier or ATOMRegressor class. These two classes are convenient wrappers for the whole machine learning pipeline. Contrary to sklearn's API, they are initialized providing the data you want to manipulate.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\", verbose=2)\n</code></pre> <p>Data transformations are applied through atom's methods. For example, calling the impute method will initialize an Imputer instance, fit it on the training set and transform the whole dataset. The transformations are applied immediately after calling the method (no fit and transform commands necessary).</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\natom.verbose = 2  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  \natom.encode(strategy=\"Target\", max_onehot=8)\n</code></pre> <p>Similarly, models are trained and evaluated using the run method. Here, we fit both a LogisticRegression and LinearDiscriminantAnalysis model, and apply hyperparameter tuning.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  # hide \natom.encode(strategy=\"Target\", max_onehot=8)  # hide\natom.verbose = 2  # hide\n\natom.run(models=[\"LR\", \"LDA\"], metric=\"auc\", n_trials=6)\n</code></pre> <p>And lastly, analyze the results.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  # hide \natom.encode(strategy=\"Target\", max_onehot=8)  # hide\n\natom.run(models=[\"LR\", \"LDA\"], metric=\"auc\", n_trials=6)  # hide\n\nprint(atom.evaluate())\n\natom.plot_lift()\n</code></pre>"}, {"location": "license/", "title": "MIT License", "text": "<p>Copyright \u00a9 2023 Mavs</p> <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:</p> <p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.</p> <p>THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.</p>"}, {"location": "API/ATOM/atomclassifier/", "title": "ATOMClassifier", "text": "<p>class atom.api.ATOMClassifier(*arrays, y=-1, index=False, shuffle=True, stratify=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for classification tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing features and target. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the target column is provided through <code>arrays</code>.</p> <p>index: bool, int, str or sequence, default=False Handle the index in the resulting dataframe. <ul> <li>If False: Reset to RangeIndex.</li> <li>If True: Use the provided index.</li> <li>If int: Position of the column to use as index.</li> <li>If str: Name of the column to use as index.</li> <li>If sequence: Array with shape=(n_samples,) to use as index.</li> </ul> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>shuffle: bool, default=True Whether to shuffle the dataset before splitting the train and test set. Be aware that not shuffling the dataset can cause an unequal distribution of target classes over the sets. <p>stratify: bool, int, str or sequence, default=True Handle stratification of the target classes over the data sets. <ul> <li>If False: The data is split randomly.</li> <li>If True: The data is stratified over the target column.</li> <li>Else: Name or position of the columns to stratify by. The   columns can't contain <code>NaN</code> values.</li> </ul> <p>This parameter is ignored if <code>shuffle=False</code> or if the test set is provided through <code>arrays</code>.</p> <p>For multioutput tasks, stratification is applied to the joint target columns.</p> <p>n_rows: int or float, default=1 Random subsample of the dataset to use. The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>ATOMRegressor Main class for regression tasks.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Initialize atom\natom = ATOMClassifier(X, y, verbose=2)\n\n# Apply data cleaning and feature engineering methods\natom.balance(strategy=\"smote\")\natom.feature_selection(strategy=\"rfe\", solver=\"lr\", n_features=22)\n\n# Train models\natom.run(models=[\"LR\", \"RF\", \"XGB\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomclassifier/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: seriesNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: seriesNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: seriesNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.classes: pd.DataFrame | NoneDistribution of target classes per data set.n_classes: int | series | NoneNumber of classes in the target column(s).</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>The function should have signature <code>func(dataset, **kw_args) -&gt; dataset</code>. This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns, outliers and unbalanced datasets. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>balanceBalance the number of rows per class in the target column.cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method balance(strategy=\"adasyn\", **kwargs)[source]Balance the number of rows per class in the target column.</p> <p>When oversampling, the newly created samples have an increasing integer index for numerical indices, and an index of the form [estimator]_N for non-numerical indices, where N stands for the N-th sample in the data set.</p> <p>See the Balancer class for a description of the parameters.</p> <p>Note</p> <ul> <li>The balance method does not support multioutput tasks.</li> <li>This transformation is only applied to the training set   in order to maintain the original distribution of target   classes in the test set.</li> </ul> <p>Tip</p> <p>Use atom's classes attribute for an overview of the target class distribution per data set.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/", "title": "ATOMForecaster", "text": "<p>class atom.api.ATOMForecaster(*arrays, y=-1, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for forecasting tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing exogeneous features and time series. Allowed formats are: <ul> <li>X</li> <li>y</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Exogeneous feature set corresponding to y, with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Time series.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Time series. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the time series is provided through <code>arrays</code>.</p> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>n_rows: int or float, default=1 Subsample of the dataset to use. The cut is made from the head of the dataset (older entries are dropped when sorted by date ascending). The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>ATOMRegressor Main class for regression tasks.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\n# Initialize atom\natom = ATOMForecaster(y, verbose=2)\n\n# Train models\natom.run(models=[\"NF\", \"ES\", \"ETS\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomforecaster/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: seriesNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: seriesNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: seriesNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>The function should have signature <code>func(dataset, **kw_args) -&gt; dataset</code>. This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns and outliers. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/ATOM/atommodel/", "title": "ATOMModel", "text": "<p>function atom.api.ATOMModel(estimator, name=None, acronym=None, needs_scaling=False, native_multilabel=False, native_multioutput=False, has_validation=None)[source]Convert an estimator to a model that can be ingested by atom.</p> <p>This function adds the relevant attributes to the estimator so that they can be used by atom. Note that only estimators that follow sklearn's API are compatible.</p> <p>Read more about using custom models in the user guide.</p> <p>Parametersestimator: Predictor Custom estimator. Should implement a <code>fit</code> and <code>predict</code> method. <p>name: str or None, default=None Name for the model. This is the value used to call the model from atom. The value should start with the model's <code>acronym</code> when specified. If None, the capital letters of the estimator's name are used (only if two or more, else it uses the entire name). <p>acronym: str or None, default=None Model's acronym. If None, it uses the model's <code>name</code>. Specify this parameter when you want to train multiple custom models that share the same estimator. <p>needs_scaling: bool, default=False Whether the model should use automated feature scaling. <p>native_multilabel: bool, default=False Whether the model has native support for multilabel tasks. If False and the task is multilabel, a multilabel meta-estimator is wrapper around the estimator. <p>native_multioutput: bool, default=False Whether the model has native support for multioutput tasks. If False and the task is multiouput, a multiotuput meta-estimator is wrapper around the estimator. <p>has_validation: str or None, default=None Whether the model allows in-training validation. If str, name of the estimator's parameter that states the number of iterations. If None, no support for in-training validation. <p>Returnsestimator Clone of the provided estimator with custom attributes. <p></p>"}, {"location": "API/ATOM/atommodel/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor, ATOMModel\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nransac = ATOMModel(\n    estimator=RANSACRegressor(),\n    name=\"RANSAC\",\n    needs_scaling=False,\n)\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y, verbose=2)\natom.run(ransac)\n</code></pre>"}, {"location": "API/ATOM/atomregressor/", "title": "ATOMRegressor", "text": "<p>class atom.api.ATOMRegressor(*arrays, y=-1, index=False, shuffle=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for regression tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing features and target. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the target column is provided through <code>arrays</code>.</p> <p>index: bool, int, str or sequence, default=False Handle the index in the resulting dataframe. <ul> <li>If False: Reset to RangeIndex.</li> <li>If True: Use the provided index.</li> <li>If int: Position of the column to use as index.</li> <li>If str: Name of the column to use as index.</li> <li>If sequence: Array with shape=(n_samples,) to use as index.</li> </ul> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>shuffle: bool, default=True Whether to shuffle the dataset before splitting the train and test set. Be aware that not shuffling the dataset can cause an unequal distribution of target classes over the sets. <p>n_rows: int or float, default=1 Random subsample of the dataset to use. The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\n# Initialize atom\natom = ATOMRegressor(X, y, verbose=2)\n\n# Apply data cleaning and feature engineering methods\natom.scale()\natom.feature_selection(strategy=\"rfecv\", solver=\"xgb\", n_features=12)\n\n# Train models\natom.run(models=[\"OLS\", \"RF\", \"XGB\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomregressor/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: seriesNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: seriesNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: seriesNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>The function should have signature <code>func(dataset, **kw_args) -&gt; dataset</code>. This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns and outliers. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/data_cleaning/balancer/", "title": "Balancer", "text": "<p>class atom.data_cleaning.Balancer(strategy=\"ADASYN\", n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)[source]Balance the number of samples per class in the target column.</p> <p>When oversampling, the newly created samples have an increasing integer index for numerical indices, and an index of the form [estimator]_N for non-numerical indices, where N stands for the N-th sample in the data set. Use only for classification tasks.</p> <p>This class can be accessed from atom through the balance method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>The clustercentroids estimator is unavailable because of    incompatibilities of the APIs.</li> <li>The Balancer class does not support multioutput tasks.</li> </ul> <p>Parametersstrategy: str or estimator, default=\"ADASYN\" Type of algorithm with which to balance the dataset. Choose from the name of any estimator in the imbalanced-learn package or provide a custom instance of such. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 - value.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: imblearn estimator Object (lowercase strategy) used to balance the data, e.g. <code>balancer.adasyn</code> for the default strategy. <p>mapping: dict Target values mapped to their respective encoded integer. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Imputer Handle missing values in the data.</p> <p>Pruner Prune outliers from the data.</p> <p></p>"}, {"location": "API/data_cleaning/balancer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.train)\n\natom.balance(strategy=\"smote\", verbose=2)\n\n# Note that the number of rows has increased\nprint(atom.train)\n</code></pre> <pre><code>from atom.data_cleaning import Balancer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nprint(X)\n\nbalancer = Balancer(strategy=\"smote\", verbose=2)\nX, y = balancer.transform(X, y)\n\n# Note that the number of rows has increased\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/balancer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformBalance the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=-1)[source]Balance the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str or sequence, default=-1 Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>Else: Array with shape=(n_samples,) to use as target.</li> </ul> <p>Returnsdataframe Balanced dataframe. <p>series Transformed target column. </p> <p></p>"}, {"location": "API/data_cleaning/cleaner/", "title": "Cleaner", "text": "<p>class atom.data_cleaning.Cleaner(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, device=\"cpu\", engine=None, verbose=0, logger=None)[source]Applies standard data cleaning steps on a dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column.</li> </ul> <p>This class can be accessed from atom through the clean method. Read more in the user guide.</p> <p>Parametersconvert_dtypes: bool, default=True Convert the column's data types to the best possible types that support <code>pd.NA</code>. <p>drop_dtypes: str, sequence or None, default=None Columns with these data types are dropped from the dataset. <p>drop_chars: str or None, default=None Remove the specified regex pattern from column names, e.g. <code>[^A-Za-z0-9]+</code> to remove all non-alphanumerical characters. <p>strip_categorical: bool, default=True Whether to strip spaces from categorical columns. <p>drop_duplicates: bool, default=False Whether to drop duplicate rows. Only the first occurrence of every duplicated row is kept. <p>drop_missing_target: bool, default=True Whether to drop rows with missing values in the target column. This transformation is ignored if <code>y</code> is not provided. <p>encode_target: bool, default=True Whether to encode the target column(s). This includes converting categorical columns to numerical, and binarizing multilabel columns. This transformation is ignored if <code>y</code> is not provided. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesmissing: list Values that are considered \"missing\". Default values are: \"\", \"?\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"none\", \"None\", \"inf\", \"-inf\". Note that <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code> are always considered missing since they are incompatible with sklearn estimators. <p>mapping: dict Target values mapped to their respective encoded integer. Only available if encode_target=True. <p>feature_names_in_: np.array Names of features seen during fit. <p>target_names_in_: np.array Names of target columns seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Discretizer Bin continuous data into intervals.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/cleaner/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\ny = [\"a\" if i else \"b\" for i in y]\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.y)\n\natom.clean(verbose=2)\n\nprint(atom.y)\n</code></pre> <pre><code>from atom.data_cleaning import Cleaner\nfrom numpy.random import randint\n\ny = [\"a\" if i else \"b\" for i in range(randint(100))]\n\ncleaner = Cleaner(verbose=2)\ny = cleaner.fit_transform(y=y)\n\nprint(y)\n</code></pre>"}, {"location": "API/data_cleaning/cleaner/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformInversely transform the label encoding.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the data cleaning steps to the data.</p> <p></p> <p>method fit(X=None, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>ReturnsCleaner Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Inversely transform the label encoding.</p> <p>This method only inversely transforms the target encoding. The rest of the transformations can't be inverted. If <code>encode_target=False</code>, the data is returned as is.</p> <p>ParametersX: dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Unchanged feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X=None, y=None)[source]Apply the data cleaning steps to the data.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/discretizer/", "title": "Discretizer", "text": "<p>class atom.data_cleaning.Discretizer(strategy=\"quantile\", bins=5, labels=None, device=\"cpu\", engine=None, verbose=0, logger=None, random_state=None)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they define the intervals. Ignores categorical columns.</p> <p>This class can be accessed from atom through the discretize method. Read more in the user guide.</p> <p>Tip</p> <p>The transformation returns categorical columns. Use the Encoder class to convert them back to numerical types.</p> <p>Parametersstrategy: str, default=\"quantile\" Strategy used to define the widths of the bins. Choose from: <ul> <li>\"uniform\": All bins have identical widths.</li> <li>\"quantile\": All bins have the same number of points.</li> <li>\"kmeans\": Values in each bin have the same nearest center of   a 1D k-means cluster.</li> <li>\"custom\": Use custom bin edges provided through <code>bins</code>.</li> </ul> <p>bins: int, sequence or dict, default=5 Bin number or bin edges in which to split every column. <ul> <li>If int: Number of bins to produce for all columns. Only for   strategy!=\"custom\".</li> <li>If sequence:<ul> <li>For strategy!=\"custom\": Number of bins per column,   allowing for non-uniform width. The n-th value corresponds   to the n-th column that is transformed. Note that   categorical columns are automatically ignored.</li> <li>For strategy=\"custom\": Bin edges with length=n_bins - 1.   The outermost edges are always <code>-inf</code> and <code>+inf</code>, e.g.   bins <code>[1, 2]</code> indicate <code>(-inf, 1], (1, 2], (2, inf]</code>.</li> </ul> </li> <li>If dict: One of the aforementioned options per column, where   the key is the column's name.</li> </ul> <p>labels: sequence, dict or None, default=None Label names with which to replace the binned intervals. <ul> <li>If None: Use default labels of the form <code>(min_edge, max_edge]</code>.</li> <li>If sequence: Labels to use for all columns.</li> <li>If dict: Labels per column, where the key is the column's name.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. Only for strategy=\"quantile\". <p>Attributesfeature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Imputer Handle missing values in the data.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p></p>"}, {"location": "API/data_cleaning/discretizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom[\"mean radius\"])\n\natom.discretize(\n    strategy=\"custom\",\n    bins=[13, 18],\n    labels=[\"small\", \"medium\", \"large\"],\n    verbose=2,\n    columns=\"mean radius\",\n)\n\nprint(atom[\"mean radius\"])\n</code></pre> <pre><code>from atom.data_cleaning import Discretizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nprint(X[\"mean radius\"])\n\ndisc = Discretizer(\n    strategy=\"custom\",\n    bins=[13, 18],\n    labels=[\"small\", \"medium\", \"large\"],\n    verbose=2,\n)\nX[\"mean radius\"] = disc.fit_transform(X[[\"mean radius\"]])[\"mean radius\"]\n\nprint(X[\"mean radius\"])\n</code></pre>"}, {"location": "API/data_cleaning/discretizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformBin the data into intervals.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsDiscretizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Bin the data into intervals.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/data_cleaning/encoder/", "title": "Encoder", "text": "<p>class atom.data_cleaning.Encoder(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"infrequent\", verbose=0, logger=None, **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Infrequent classes can be replaced with a value in order to prevent too high cardinality.</p> <p>This class can be accessed from atom through the encode method. Read more in the user guide.</p> <p>Warning</p> <p>Three category-encoders estimators are unavailable:</p> <ul> <li>OneHotEncoder: Use the max_onehot parameter.</li> <li>HashingEncoder: Incompatibility of APIs.</li> <li>LeaveOneOutEncoder: Incompatibility of APIs.</li> </ul> <p>Parametersstrategy: str or estimator, default=\"Target\" Type of encoding to use for high cardinality features. Choose from any of the estimators in the category-encoders package or provide a custom one. <p>max_onehot: int or None, default=10 Maximum number of unique values in a feature to perform one-hot encoding. If None, <code>strategy</code>-encoding is always used for columns with more than two classes. <p>ordinal: dict or None, default=None Order of ordinal features, where the dict key is the feature's name and the value is the class order, e.g. <code>{\"salary\": [\"low\", \"medium\", \"high\"]}</code>. <p>infrequent_to_value: int, float or None, default=None Replaces infrequent class occurrences in categorical columns with the string in parameter <code>value</code>. This transformation is done before the encoding of the column. <ul> <li>If None: Skip this step.</li> <li>If int: Minimum number of occurrences in a class.</li> <li>If float: Minimum fraction of occurrences in a class.</li> </ul> <p>value: str, default=\"infrequent\" Value with which to replace rare classes. This parameter is ignored if <code>infrequent_to_value=None</code>. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributesmapping: dict of dicts Encoded values and their respective mapping. The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...). <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Cleaner Applies standard data cleaning steps on a dataset.</p> <p>Imputer Handle missing values in the data.</p> <p>Pruner Prune outliers from the data.</p> <p></p>"}, {"location": "API/data_cleaning/encoder/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom numpy.random import randint\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nX[\"cat_feature_1\"] = [f\"x{i}\" for i in randint(0, 2, len(X))]\nX[\"cat_feature_2\"] = [f\"x{i}\" for i in randint(0, 3, len(X))]\nX[\"cat_feature_3\"] = [f\"x{i}\" for i in randint(0, 20, len(X))]\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.X)\n\natom.encode(strategy=\"target\", max_onehot=10, verbose=2)\n\n# Note the one-hot encoded column with name [feature]_[class]\nprint(atom.X)\n</code></pre> <pre><code>from atom.data_cleaning import Encoder\nfrom sklearn.datasets import load_breast_cancer\nfrom numpy.random import randint\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nX[\"cat_feature_1\"] = [f\"x{i}\" for i in randint(0, 2, len(X))]\nX[\"cat_feature_2\"] = [f\"x{i}\" for i in randint(0, 3, len(X))]\nX[\"cat_feature_3\"] = [f\"x{i}\" for i in randint(0, 20, len(X))]\nprint(X)\n\nencoder = Encoder(strategy=\"target\", max_onehot=10, verbose=2)\nX = encoder.fit_transform(X, y)\n\n# Note the one-hot encoded column with name [feature]_[class]\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/encoder/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformEncode the data.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>Note that leaving y=None can lead to errors if the <code>strategy</code> encoder requires target values. For multioutput tasks, only the first target column is used to fit the encoder.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence or dataframe-like Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>ReturnsEncoder Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Encode the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Encoded dataframe. </p> <p></p>"}, {"location": "API/data_cleaning/imputer/", "title": "Imputer", "text": "<p>class atom.data_cleaning.Imputer(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, device=\"cpu\", engine=None, verbose=0, logger=None)[source]Handle missing values in the data.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>This class can be accessed from atom through the impute method. Read more in the user guide.</p> <p>Parametersstrat_num: str, int or float, default=\"drop\" Imputing strategy for numerical columns. Choose from: <ul> <li>\"drop\": Drop rows containing missing values.</li> <li>\"mean\": Impute with mean of column.</li> <li>\"median\": Impute with median of column.</li> <li>\"knn\": Impute using a K-Nearest Neighbors approach.</li> <li>\"most_frequent\": Impute with most frequent value.</li> <li>int or float: Impute with provided numerical value.</li> </ul> <p>strat_cat: str, default=\"drop\" Imputing strategy for categorical columns. Choose from: <ul> <li>\"drop\": Drop rows containing missing values.</li> <li>\"most_frequent\": Impute with most frequent value.</li> <li>str: Impute with provided string.</li> </ul> <p>max_nan_rows: int, float or None, default=None Maximum number or fraction of missing values in a row (if more, the row is removed). If None, ignore this step. <p>max_nan_cols: int, float or None, default=None Maximum number or fraction of missing values in a column (if more, the column is removed). If None, ignore this step. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesmissing: list Values that are considered \"missing\". Default values are: \"\", \"?\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"none\", \"None\", \"inf\", \"-inf\". Note that <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code> are always considered missing since they are incompatible with sklearn estimators. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Discretizer Bin continuous data into intervals.</p> <p>Encoder Perform encoding of categorical features.</p> <p></p>"}, {"location": "API/data_cleaning/imputer/#example", "title": "Example", "text": "atomstand-alone <pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom numpy.random import randint\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add some random missing values to the data\nfor i, j in zip(randint(0, X.shape[0], 600), randint(0, 4, 600)):\n    X.iat[i, j] = np.NaN\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.nans)\n\natom.impute(strat_num=\"median\", max_nan_rows=0.1, verbose=2)\n\nprint(atom.n_nans)\n</code></pre> <pre><code>import numpy as np\nfrom atom.data_cleaning import Imputer\nfrom numpy.random import randint\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add some random missing values to the data\nfor i, j in zip(randint(0, X.shape[0], 600), randint(0, 4, 600)):\n    X.iloc[i, j] = np.nan\n\nimputer = Imputer(strat_num=\"median\", max_nan_rows=0.1, verbose=2)\nX, y = imputer.fit_transform(X, y)\n\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/imputer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformImpute the missing values.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsImputer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Impute the missing values.</p> <p>Note that leaving y=None can lead to inconsistencies in data length between X and y if rows are dropped during the transformation.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Imputed dataframe. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/normalizer/", "title": "Normalizer", "text": "<p>class atom.data_cleaning.Normalizer(strategy=\"yeojohnson\", device=\"cpu\", engine=None, verbose=0, logger=None, random_state=None, **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Categorical columns are ignored.</p> <p>This class can be accessed from atom through the normalize method. Read more in the user guide.</p> <p>Warning</p> <p>The quantile strategy performs a non-linear transformation. This may distort linear correlations between variables measured at the same scale but renders variables measured at different scales more directly comparable.</p> <p>Note</p> <p>The yeojohnson and boxcox strategies scale the data after transforming. Use the <code>kwargs</code> to change this behaviour.</p> <p>Parametersstrategy: str, default=\"yeojohnson\" The transforming strategy. Choose from: <ul> <li>\"yeojohnson\"</li> <li>\"boxcox\" (only works with strictly positive values)</li> <li>\"quantile\": Transform features using quantiles information.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the quantile strategy. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Object with which the data is transformed. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Cleaner Applies standard data cleaning steps on a dataset.</p> <p>Pruner Prune outliers from the data.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/normalizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n\natom.normalize(verbose=2)\n\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n</code></pre> <pre><code>from atom.data_cleaning import Normalizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nnormalizer = Normalizer(verbose=2)\nX = normalizer.fit_transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/normalizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformApply the inverse transformation to the data.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the transformations to the data.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsNormalizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X, y=None)[source]Apply the inverse transformation to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Original dataframe. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the transformations to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Normalized dataframe. </p> <p></p>"}, {"location": "API/data_cleaning/pruner/", "title": "Pruner", "text": "<p>class atom.data_cleaning.Pruner(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, device=\"cpu\", engine=None, verbose=0, logger=None, **kwargs)[source]Prune outliers from the data.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>This class can be accessed from atom through the prune method. Read more in the user guide.</p> <p>Info</p> <p>The \"sklearnex\" and \"cuml\" engines are only supported for strategy=\"dbscan\".</p> <p>Parametersstrategy: str or sequence, default=\"zscore\" Strategy with which to select the outliers. If sequence of strategies, only samples marked as outliers by all chosen strategies are dropped. Choose from: <ul> <li>\"zscore\": Z-score of each data value.</li> <li>\"iforest\": Isolation Forest.</li> <li>\"ee\": Elliptic Envelope.</li> <li>\"lof\": Local Outlier Factor.</li> <li>\"svm\": One-class SVM.</li> <li>\"dbscan\": Density-Based Spatial Clustering.</li> <li>\"hdbscan\": Hierarchical Density-Based Spatial Clustering.</li> <li>\"optics\": DBSCAN-like clustering approach.</li> </ul> <p>method: int, float or str, default=\"drop\" Method to apply on the outliers. Only the zscore strategy accepts another method than \"drop\". Choose from: <ul> <li>\"drop\": Drop any sample with outlier values.</li> <li>\"minmax\": Replace outlier with the min/max of the column.</li> <li>Any numerical value with which to replace the outliers.</li> </ul> <p>max_sigma: int or float, default=3 Maximum allowed standard deviations from the mean of the column. If more, it is considered an outlier. Only if strategy=\"zscore\". <p>include_target: bool, default=False Whether to include the target column in the search for outliers. This can be useful for regression tasks. Only if strategy=\"zscore\". <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. If sequence of strategies, the params should be provided in a dict with the strategy's name as key. <p>Attributes[strategy]: sklearn estimator Object used to prune the data, e.g. <code>pruner.iforest</code> for the isolation forest strategy. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/pruner/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.prune(stratgey=\"iforest\", verbose=2)\n\n# Note the reduced number of rows\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n</code></pre> <pre><code>from atom.data_cleaning import Normalizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nnormalizer = Normalizer(verbose=2)\nX = normalizer.fit_transform(X)\n\n# Note the reduced number of rows\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/pruner/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the outlier strategy on the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the outlier strategy on the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/scaler/", "title": "Scaler", "text": "<p>class atom.data_cleaning.Scaler(strategy=\"standard\", include_binary=False, device=\"cpu\", engine=None, verbose=0, logger=None, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>This class can be accessed from atom through the scale method. Read more in the user guide.</p> <p>Parametersstrategy: str, default=\"standard\" Strategy with which to scale the data. Choose from: <ul> <li>\"standard\": Remove mean and scale to unit variance.</li> <li>\"minmax\": Scale features to a given range.</li> <li>\"maxabs\": Scale features by their maximum absolute value.</li> <li>\"robust\": Scale using statistics that are robust to outliers.</li> </ul> <p>include_binary: bool, default=False Whether to scale binary columns (only 0s and 1s). <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Object with which the data is scaled. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/scaler/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.scale(verbose=2)\n\n# Note the reduced number of rows\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.data_cleaning import Scaler\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nscaler = Scaler(verbose=2)\nX = scaler.fit_transform(X)\n\n# Note the reduced number of rows\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/scaler/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformApply the inverse transformation to the data.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformPerform standardization by centering and scaling.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsScaler Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X, y=None)[source]Apply the inverse transformation to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Scaled dataframe. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Perform standardization by centering and scaling.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Scaled dataframe. </p> <p></p>"}, {"location": "API/feature_engineering/featureextractor/", "title": "FeatureExtractor", "text": "<p>class atom.feature_engineering.FeatureExtractor(features=('day', 'month', 'year'), fmt=None, encoding_type=\"ordinal\", drop_columns=True, verbose=0, logger=None)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>This class can be accessed from atom through the feature_extraction method. Read more in the user guide.</p> <p>Warning</p> <p>Decision trees based algorithms build their split rules according to one feature at a time. This means that they will fail to correctly process cyclic features since the sin/cos features should be considered one single coordinate system.</p> <p>Parametersfeatures: str or sequence, default=(\"day\", \"month\", \"year\") Features to create from the datetime columns. Note that created features with zero variance (e.g. the feature hour in a column that only contains dates) are ignored. Allowed values are datetime attributes from <code>pandas.Series.dt</code>. <p>fmt: str, sequence or None, default=None Format (<code>strptime</code>) of the categorical columns that need to be converted to datetime. If sequence, the n-th format corresponds to the n-th categorical column that can be successfully converted. If None, the format is inferred automatically from the first non NaN value. Values that can not be converted are returned as <code>NaT</code>. <p>encoding_type: str, default=\"ordinal\" Type of encoding to use. Choose from: <ul> <li>\"ordinal\": Encode features in increasing order.</li> <li>\"cyclic\": Encode features using sine and cosine to capture   their cyclic nature. This approach creates two columns for   every feature. Non-cyclic features still use ordinal encoding.</li> </ul> <p>drop_columns: bool, default=True Whether to drop the original columns after transformation. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesfeature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featureextractor/#example", "title": "Example", "text": "atomstand-alone <pre><code>import pandas as pd\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a datetime column\nX[\"date\"] = pd.date_range(start=\"1/1/2018\", periods=len(X))\n\natom = ATOMClassifier(X, y)\natom.feature_extraction(features=[\"day\"], fmt=\"%d/%m/%Y\", verbose=2)\n\n# Note the date_day column\nprint(atom.dataset)\n</code></pre> <pre><code>import pandas as pd\nfrom atom.feature_engineering import FeatureExtractor\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a datetime column\nX[\"date\"] = pd.date_range(start=\"1/1/2018\", periods=len(X))\n\nfe = FeatureExtractor(features=[\"day\"], fmt=\"%Y-%m-%d\", verbose=2)\nX = fe.transform(X)\n\n# Note the date_day column\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featureextractor/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformExtract the new features.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Extract the new features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featuregenerator/", "title": "FeatureGenerator", "text": "<p>class atom.feature_engineering.FeatureGenerator(strategy=\"dfs\", n_features=None, operators=None, n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>This class can be accessed from atom through the feature_generation method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>Using the <code>div</code>, <code>log</code> or <code>sqrt</code> operators can return new   features with <code>inf</code> or <code>NaN</code> values. Check the warnings that   may pop up or use atom's nans attribute.</li> <li>When using dfs with <code>n_jobs&gt;1</code>, make sure to protect your code   with <code>if __name__ == \"__main__\"</code>. Featuretools uses   dask, which uses python multiprocessing   for parallelization. The spawn method on multiprocessing   starts a new python process, which requires it to import the   __main__ module before it can do its task.</li> <li>gfg can be slow for very large populations.</li> </ul> <p>Tip</p> <p>dfs can create many new features and not all of them will be useful. Use the FeatureSelector class to reduce the number of features.</p> <p>Parametersstrategy: str, default=\"dfs\" Strategy to crate new features. Choose from: <ul> <li>\"dfs\": Deep Feature Synthesis.</li> <li>\"gfg\": Genetic Feature Generation.</li> </ul> <p>n_features: int or None, default=None Maximum number of newly generated features to add to the dataset. If None, select all created features. <p>operators: str, sequence or None, default=None Mathematical operators to apply on the features. None to use all. Choose from: <code>add</code>, <code>sub</code>, <code>mul</code>, <code>div</code>, <code>abs</code>, <code>sqrt</code>, <code>log</code>, <code>inv</code>, <code>sin</code>, <code>cos</code>, <code>tan</code>. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the SymbolicTransformer instance. Only for the gfg strategy. <p>Attributesgfg: SymbolicTransformer Object used to calculate the genetic features. Only for the gfg strategy. <p>genetic_features: pd.DataFrame Information on the newly created non-linear features. Only for the gfg strategy. Columns include: <ul> <li>name: Name of the feature (generated automatically).</li> <li>description: Operators used to create this feature.</li> <li>fitness: Fitness score.</li> </ul> <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featuregenerator/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_generation(strategy=\"dfs\", n_features=5, verbose=2)\n\n# Note the texture error / worst symmetry column\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.feature_engineering import FeatureGenerator\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nfg = FeatureGenerator(strategy=\"dfs\", n_features=5, verbose=2)\nX = fg.fit_transform(X, y)\n\n# Note the radius error * worst smoothness column\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featuregenerator/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformGenerate new features.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Generate new features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featuregrouper/", "title": "FeatureGrouper", "text": "<p>class atom.feature_engineering.FeatureGrouper(group, operators=None, drop_columns=True, verbose=0, logger=None)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>This class can be accessed from atom through the feature_grouping method. Read more in the user guide.</p> <p>Tip</p> <p>Use a regex pattern with the <code>groups</code> parameter to select groups easier, e.g. <code>atom.feature_grouping({\"group1\": \"var_.+\")</code> to select all features that start with <code>var_</code>.</p> <p>Parametersgroup: dict Group names and features. Select the features by name, position or regex pattern. A feature can belong to multiple groups. <p>operators: str, sequence or None, default=None Statistical operators to apply on the groups. Any operator from <code>numpy</code> or <code>scipy.stats</code> (checked in that order) that is applied on an array can be used. If None, it uses: <code>min</code>, <code>max</code>, <code>mean</code>, <code>median</code>, <code>mode</code> and <code>std</code>. <p>drop_columns: bool, default=True Whether to drop the columns in <code>groups</code> after transformation. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesgroups: dict Names and features of every created group. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featuregrouper/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_grouping({\"means\": [\"mean.+\"]}, verbose=2)\n\n# Note the mean features are gone and the new std(means) feature\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.feature_engineering import FeatureGrouper\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Group all features that start with mean\nfg = FeatureGrouper({\"means\": [\"mean.+\"]}, verbose=2)\nX = fg.transform(X)\n\n# Note the mean features are gone and the new std(means) feature\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featuregrouper/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformGroup features.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Group features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featureselector/", "title": "FeatureSelector", "text": "<p>class atom.feature_engineering.FeatureSelector(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, logger=None, random_state=None, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>This class can be accessed from atom through the feature_selection method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>Ties between features with equal scores are broken in an   unspecified way.</li> <li>For strategy=\"rfecv\", the <code>n_features</code> parameter is the   minimum number of features to select, not the actual   number of features that the transformer returns. It may very   well be that it returns more!</li> </ul> <p>Info</p> <ul> <li>The \"sklearnex\" and \"cuml\" engines are only supported for   strategy=\"pca\" with dense datasets.</li> <li>If strategy=\"pca\" and the data is dense and unscaled, it's   scaled to mean=0 and std=1 before fitting the PCA transformer.</li> <li>If strategy=\"pca\" and the provided data is sparse, the used   estimator is TruncatedSVD, which works more efficiently   with sparse matrices.</li> </ul> <p>Tip</p> <p>Use the plot_feature_importance method to examine how much a specific feature contributes to the final predictions. If the model doesn't have a <code>feature_importances_</code> attribute, use plot_permutation_importance instead.</p> <p>Parametersstrategy: str or None, default=None Feature selection strategy to use. Choose from: <ul> <li>None: Do not perform any feature selection strategy.</li> <li>\"univariate\": Univariate statistical F-test.</li> <li>\"pca\": Principal Component Analysis.</li> <li>\"sfm\": Select best features according to a model.</li> <li>\"sfs\": Sequential Feature Selection.</li> <li>\"rfe\": Recursive Feature Elimination.</li> <li>\"rfecv\": RFE with cross-validated selection.</li> <li>\"pso\": Particle Swarm Optimization.</li> <li>\"hho\": Harris Hawks Optimization.</li> <li>\"gwo\": Grey Wolf Optimization.</li> <li>\"dfo\": Dragonfly Optimization.</li> <li>\"go\": Genetic Optimization.</li> </ul> <p>solver: str, estimator or None, default=None Solver/estimator to use for the feature selection strategy. See the corresponding documentation for an extended description of the choices. If None, the default value is used (only if strategy=\"pca\"). Choose from: <ul> <li> <p>If strategy=\"univariate\":</p> <ul> <li>\"f_classif\"</li> <li>\"f_regression\"</li> <li>\"mutual_info_classif\"</li> <li>\"mutual_info_regression\"</li> <li>\"chi2\"</li> <li>Any function with signature <code>func(X, y) -&gt; (scores, p-values)</code>.</li> </ul> </li> <li> <p>If strategy=\"pca\":</p> <ul> <li> <p>If data is dense:</p> <ul> <li> <p>If engine=\"sklearn\":</p> <ul> <li>\"auto\" (default)</li> <li>\"full\"</li> <li>\"arpack\"</li> <li>\"randomized\"</li> </ul> </li> <li> <p>If engine=\"sklearnex\":</p> <ul> <li>\"full\" (default)</li> </ul> </li> <li> <p>If engine=\"cuml\":</p> <ul> <li>\"full\" (default)</li> <li>\"jacobi\"</li> </ul> </li> </ul> </li> <li> <p>If data is sparse:</p> <ul> <li>\"randomized\" (default)</li> <li>\"arpack\"</li> </ul> </li> </ul> </li> <li> <p>for the remaining strategies:   The base estimator. For sfm, rfe and rfecv, it should have   either a <code>feature_importances_</code> or <code>coef_</code> attribute after   fitting. You can use one of the predefined models. Add   <code>_class</code> or <code>_reg</code> after the model's  name to specify a   classification or regression task, e.g. <code>solver=\"LGB_reg\"</code>   (not necessary if called from atom). No default option.</p> </li> </ul> <p>n_features: int, float or None, default=None Number of features to select. <ul> <li>If None: Select all features.</li> <li>If &lt;1: Fraction of the total features to select.</li> <li>If &gt;=1: Number of features to select.</li> </ul> <p>If strategy=\"sfm\" and the threshold parameter is not specified, the threshold is automatically set to <code>-inf</code> to select <code>n_features</code> number of features.</p> <p>If strategy=\"rfecv\", <code>n_features</code> is the minimum number of features to select.</p> <p>This parameter is ignored if any of the following strategies is selected: pso, hho, gwo, dfo, go.</p> <p>min_repeated: int, float or None, default=2 Remove categorical features if there isn't any repeated value in at least <code>min_repeated</code> rows. The default is to keep all features with non-maximum variance, i.e. remove the features which number of unique values is equal to the number of rows (usually the case for names, IDs, etc...). <ul> <li>If None: No check for minimum repetition.</li> <li>If &gt;1: Minimum repetition number.</li> <li>If &lt;=1: Minimum repetition fraction.</li> </ul> <p>max_repeated: int, float or None, default=1.0 Remove categorical features with the same value in at least <code>max_repeated</code> rows. The default is to keep all features with non-zero variance, i.e. remove the features that have the same value in all samples. <ul> <li>If None: No check for maximum repetition.</li> <li>If &gt;1: Maximum number of repeated occurences.</li> <li>If &lt;=1: Maximum fraction of repeated occurences.</li> </ul> <p>max_correlation: float or None, default=1.0 Minimum absolute Pearson correlation to identify correlated features. For each group, it removes all except the feature with the highest correlation to <code>y</code> (if provided, else it removes all but the first). The default value removes equal columns. If None, skip this step. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Any extra keyword argument for the strategy estimator. See the corresponding documentation for the available options. <p>Attributescollinear: pd.DataFrame Information on the removed collinear features. Columns include: <ul> <li>drop: Name of the dropped feature.</li> <li>corr_feature: Names of the correlated features.</li> <li>corr_value: Corresponding correlation coefficients.</li> </ul> <p>[strategy]: sklearn transformer Object used to transform the data, e.g. <code>fs.pca</code> for the pca strategy. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p></p>"}, {"location": "API/feature_engineering/featureselector/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_selection(strategy=\"pca\", n_features=12, verbose=2)\n\n# Note that the column names changed\nprint(atom.dataset)\n\natom.plot_pca()\n</code></pre> <pre><code>from atom.feature_engineering import FeatureSelector\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\nfs = FeatureSelector(strategy=\"pca\", n_features=12, verbose=2)\nX = fs.fit_transform(X)\n\n# Note that the column names changed\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featureselector/#methods", "title": "Methods", "text": "<p>fitFit the feature selector to the data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.plot_componentsPlot the explained variance ratio per component.plot_pcaPlot the explained variance ratio vs number of components.plot_rfecvPlot the rfecv results.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformTransform the data.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method fit(X, y=None)[source]Fit the feature selector to the data.</p> <p>The univariate, sfm (when model is not fitted), sfs, rfe and rfecv strategies need a target column. Leaving it None raises an exception.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method plot_components(show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the explained variance ratio per component.</p> <p>Kept components are colored and discarted components are transparent. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parametersshow: int or None, default=None Number of components to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of components shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method plot_pca(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the explained variance ratio vs number of components.</p> <p>If the underlying estimator is PCA (for dense datasets), all possible components are plotted. If the underlying estimator is TruncatedSVD (for sparse datasets), it only shows the selected components. The star marks the number of components selected by the user. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method plot_rfecv(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the rfecv results.</p> <p>Plot the scores obtained by the estimator fitted on every subset of the dataset. Only available when feature selection was applied with strategy=\"rfecv\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Transform the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/adab/", "title": "AdaBoost", "text": "<p>AdaBaccept sparse</p> <p>AdaBoost is a meta-estimator that begins by fitting a classifier/regressor on the original dataset and then fits additional copies of the algorithm on the same dataset but where the weights of instances are adjusted according to the error of the current prediction.</p> <p>Corresponding estimators are:</p> <ul> <li>AdaBoostClassifier for classification tasks.</li> <li>AdaBoostRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>RandomForest Random Forest.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/adab/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"AdaB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/adab/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=50, step=10)learning_rateFloatDistribution(high=10.0, log=True, low=0.01, step=None)algorithmCategoricalDistribution(choices=('SAMME.R', 'SAMME'))</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=50, step=10)learning_rateFloatDistribution(high=10.0, log=True, low=0.01, step=None)lossCategoricalDistribution(choices=('linear', 'square', 'exponential'))</p> <p></p> <p></p>"}, {"location": "API/models/adab/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/adab/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/adab/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/adab/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/adab/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ard/", "title": "AutomaticRelevanceDetermination", "text": "<p>ARDneeds scaling</p> <p>Automatic Relevance Determination is very similar to BayesianRidge, but can lead to sparser coefficients. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions.</p> <p>Corresponding estimators are:</p> <ul> <li>ARDRegression for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>GaussianProcess Gaussian process.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p></p>"}, {"location": "API/models/ard/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"ARD\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ard/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersn_iterIntDistribution(high=1000, log=False, low=100, step=10)alpha_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)alpha_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/ard/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ard/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ard/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ard/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/ard/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/arima/", "title": "ARIMA", "text": "<p>ARIMAnative multioutput</p> <p>Seasonal ARIMA models and exogeneous input is supported, hence this estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX.</p> <p>An ARIMA model, is a generalization of an autoregressive moving average (ARMA) model, and is fitted to time-series data in an effort to forecast future points. ARIMA models can be especially efficacious in cases where data shows evidence of non-stationarity.</p> <p>The \"AR\" part of ARIMA indicates that the evolving variable of interest is regressed on its own lagged (i.e., prior observed) values. The \"MA\" part indicates that the regression error is actually a linear combination of error terms whose values occurred contemporaneously and at various times in the past. The \"I\" (for \"integrated\") indicates that the data values have been replaced with the difference between their values and the previous values (and this differencing process may have been performed more than once).</p> <p>Corresponding estimators are:</p> <ul> <li>ARIMA for forecasting tasks.</li> </ul> <p>Warning</p> <p>ARIMA often runs into numerical errors when optimizing the hyperparameters. Possible solutions are:</p> <ul> <li>Use the AutoARIMA model instead.</li> <li>Use <code>est_params</code> to specify the   orders manually, e.g. <code>atom.run(\"arima\", n_trials=5,est_params={\"order\": (1, 1, 0)})</code>.</li> <li>Use the <code>catch</code> parameter in <code>ht_params</code>   to avoid raising every exception, e.g. <code>atom.run(\"arima\",n_trials=5, ht_params={\"catch\": (Exception,)})</code>.</li> </ul> <p></p> <p>See Also</p> <p>AutoARIMA Automatic Autoregressive Integrated Moving Average Model.</p> <p></p>"}, {"location": "API/models/arima/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_longley\n\n_, X = load_longley()\n\natom = ATOMForecaster(X)\natom.run(models=\"ARIMA\", verbose=2)\n</code></pre>"}, {"location": "API/models/arima/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterspIntDistribution(high=2, log=False, low=0, step=1)dIntDistribution(high=1, log=False, low=0, step=1)qIntDistribution(high=2, log=False, low=0, step=1)PsIntDistribution(high=2, log=False, low=0, step=1)DsIntDistribution(high=1, log=False, low=0, step=1)QsIntDistribution(high=2, log=False, low=0, step=1)SCategoricalDistribution(choices=(0, 4, 6, 7, 12))methodCategoricalDistribution(choices=('newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg', 'basinhopping'))maxiterIntDistribution(high=200, log=False, low=50, step=10)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/arima/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/arima/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/arima/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/arima/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/arima/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/autoarima/", "title": "AutoARIMA", "text": "<p>AutoARIMAnative multioutput</p> <p>ARIMA implementation that includes automated fitting of (S)ARIMA(X) hyperparameters (p, d, q, P, D, Q). The AutoARIMA algorithm seeks to identify the most optimal parameters for an ARIMA model, settling on a single fitted ARIMA model. This process is based on the commonly-used R function.</p> <p>AutoARIMA works by conducting differencing tests (i.e., Kwiatkowski\u2013Phillips\u2013Schmidt\u2013Shin, Augmented Dickey-Fuller or Phillips\u2013Perron) to determine the order of differencing, d, and then fitting models within defined ranges. AutoARIMA also seeks to identify the optimal P and Q hyperparameters after conducting the Canova-Hansen to determine the optimal order of seasonal differencing.</p> <p>Note that due to stationarity issues, AutoARIMA might not find a suitable model that will converge. If this is the case, a ValueError is thrown suggesting stationarity-inducing measures be taken prior to re-fitting or that a new range of order values be selected.</p> <p>Corresponding estimators are:</p> <ul> <li>AutoARIMA for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p></p>"}, {"location": "API/models/autoarima/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_longley\n\n_, X = load_longley()\n\natom = ATOMForecaster(X, random_state=1)\natom.run(models=\"autoarima\", verbose=2)\n</code></pre>"}, {"location": "API/models/autoarima/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersmethodCategoricalDistribution(choices=('newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg', 'basinhopping'))maxiterIntDistribution(high=200, log=False, low=50, step=10)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/autoarima/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/autoarima/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/autoarima/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/autoarima/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/autoarima/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/bag/", "title": "Bagging", "text": "<p>Bagaccept sparse</p> <p>Bagging uses an ensemble meta-estimator that fits base predictors on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator by introducing randomization into its construction procedure and then making an ensemble out of it.</p> <p>Corresponding estimators are:</p> <ul> <li>BaggingClassifier for classification tasks.</li> <li>BaggingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>LogisticRegression Logistic Regression.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/bag/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Bag\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/bag/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)max_samplesFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)max_featuresFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)bootstrapCategoricalDistribution(choices=(True, False))bootstrap_featuresCategoricalDistribution(choices=(True, False))</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)max_samplesFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)max_featuresFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)bootstrapCategoricalDistribution(choices=(True, False))bootstrap_featuresCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/bag/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/bag/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/bag/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/bag/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/bag/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/bnb/", "title": "BernoulliNB", "text": "<p>BNBaccept sparsesupports acceleration</p> <p>BernoulliNB implements the Naive Bayes algorithm for multivariate Bernoulli models. Like MultinomialNB, this classifier is suitable for discrete data. The difference is that while MNB works with occurrence counts, BNB is designed for binary/boolean features.</p> <p>Corresponding estimators are:</p> <ul> <li>BernoulliNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ComplementNB Complement Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>MultinomialNB Multinomial Naive Bayes.</p> <p></p>"}, {"location": "API/models/bnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"BNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/bnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/bnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/bnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/bnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/bnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/bnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/br/", "title": "BayesianRidge", "text": "<p>BRneeds scaling</p> <p>Bayesian regression techniques can be used to include regularization parameters in the estimation procedure: the regularization parameter is not set in a hard sense but tuned to the data at hand.</p> <p>Corresponding estimators are:</p> <ul> <li>BayesianRidge for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>AutomaticRelevanceDetermination Automatic Relevance Determination.</p> <p>GaussianProcess Gaussian process.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p></p>"}, {"location": "API/models/br/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"BR\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/br/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersn_iterIntDistribution(high=1000, log=False, low=100, step=10)alpha_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)alpha_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/br/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/br/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/br/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/br/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/br/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/catb/", "title": "CatBoost", "text": "<p>CatBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>CatBoost is a machine learning method based on gradient boosting over decision trees. Main advantages of CatBoost:</p> <ul> <li>Superior quality when compared with other GBDT models on many   datasets.</li> <li>Best in class prediction speed.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>CatBoostClassifier for classification tasks.</li> <li>CatBoostRegressor for regression tasks.</li> </ul> <p>Read more in CatBoost's documentation.</p> <p>Warning</p> <ul> <li>CatBoost selects the weights achieved by the best evaluation   on the test set after training. This means that, by default,   there is some minor data leakage in the test set. Use the   <code>use_best_model=False</code> parameter to avoid this behavior or use   a holdout set to evaluate the final estimator.</li> <li>In-training validation and pruning are disabled when   <code>device=\"gpu\"</code>.</li> </ul> <p>Note</p> <p>ATOM uses CatBoost's <code>n_estimators</code> parameter instead of <code>iterations</code> to indicate the number of trees to fit. This is done to have consistent naming with the XGBoost and LightGBM models.</p> <p></p> <p>See Also</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/catb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CatB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/catb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)bootstrap_typeCategoricalDistribution(choices=('Bayesian', 'Bernoulli'))bagging_temperatureFloatDistribution(high=10.0, log=False, low=0.0, step=None)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)bootstrap_typeCategoricalDistribution(choices=('Bayesian', 'Bernoulli'))bagging_temperatureFloatDistribution(high=10.0, log=False, low=0.0, step=None)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/catb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/catb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/catb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/catb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/catb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/catnb/", "title": "CategoricalNB", "text": "<p>CatNBaccept sparsesupports acceleration</p> <p>Categorical Naive Bayes implements the Naive Bayes algorithm for categorical features.</p> <p>Corresponding estimators are:</p> <ul> <li>CategoricalNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p></p>"}, {"location": "API/models/catnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nimport numpy as np\n\nX = np.random.randint(5, size=(100, 100))\ny = np.random.randint(2, size=100)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CatNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/catnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/catnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/catnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/catnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/catnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/catnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/cnb/", "title": "ComplementNB", "text": "<p>CNBaccept sparsesupports acceleration</p> <p>The Complement Naive Bayes classifier was designed to correct the \"severe assumptions\" made by the standard MultinomialNB classifier. It is particularly suited for imbalanced datasets.</p> <p>Corresponding estimators are:</p> <ul> <li>ComplementNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>MultinomialNB Multinomial Naive Bayes.</p> <p></p>"}, {"location": "API/models/cnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/cnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))normCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))normCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/cnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/cnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/cnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/cnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/cnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/dummy/", "title": "Dummy", "text": "<p>Dummy</p> <p>When doing supervised learning, a simple sanity check consists of comparing one's estimator against simple rules of thumb. The prediction methods completely ignore the input data. Do not use this model for real problems. Use it only as a simple baseline to compare with other models.</p> <p>Corresponding estimators are:</p> <ul> <li>DummyClassifier for classification tasks.</li> <li>DummyRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>NaiveForecaster Naive Forecaster.</p> <p></p>"}, {"location": "API/models/dummy/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Dummy\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/dummy/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersstrategyCategoricalDistribution(choices=('most_frequent', 'prior', 'stratified', 'uniform'))</p> <p>ParametersstrategyCategoricalDistribution(choices=('mean', 'median', 'quantile'))quantileFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/dummy/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/dummy/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/dummy/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/dummy/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/dummy/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/en/", "title": "ElasticNet", "text": "<p>ENneeds scalingaccept sparsesupports acceleration</p> <p>Linear least squares with l1 and l2 regularization.</p> <p>Corresponding estimators are:</p> <ul> <li>ElasticNet for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/en/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"EN\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/en/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p></p> <p></p>"}, {"location": "API/models/en/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/en/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/en/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/en/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/en/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/es/", "title": "ExponentialSmoothing", "text": "<p>ESnative multioutput</p> <p>Holt-Winters exponential smoothing forecaster. The default settings use simple exponential smoothing, without trend and seasonality components.</p> <p>Corresponding estimators are:</p> <ul> <li>ExponentialSmoothing for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/es/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"ES\", verbose=2)\n</code></pre>"}, {"location": "API/models/es/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterstrendCategoricalDistribution(choices=('add', 'mul', None))damped_trendCategoricalDistribution(choices=(True, False))seasonalCategoricalDistribution(choices=('add', 'mul', None))spCategoricalDistribution(choices=(4, 6, 7, 12, None))use_boxcoxCategoricalDistribution(choices=(True, False))initialization_methodCategoricalDistribution(choices=('estimated', 'heuristic'))methodCategoricalDistribution(choices=('L-BFGS-B', 'TNC', 'SLSQP', 'Powell', 'trust-constr', 'bh', 'ls'))</p> <p></p> <p></p>"}, {"location": "API/models/es/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/es/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/es/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/es/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/es/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/et/", "title": "ExtraTrees", "text": "<p>ETaccept sparsenative multilabelnative multioutput</p> <p>Extra-Trees use a meta estimator that fits a number of randomized decision trees (a.k.a. extra-trees) on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.</p> <p>Corresponding estimators are:</p> <ul> <li>ExtraTreesClassifier for classification tasks.</li> <li>ExtraTreesRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/et/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"ET\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/et/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/et/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/et/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/et/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/et/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/et/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/etree/", "title": "ExtraTree", "text": "<p>ETreeaccept sparsenative multilabelnative multioutput</p> <p>Extra-trees differ from classic decision trees in the way they are built. When looking for the best split to separate the samples of a node into two groups, random splits are drawn for each of the max_features randomly selected features and the best split among those is chosen. When max_features is set 1, this amounts to building a totally random decision tree.</p> <p>Corresponding estimators are:</p> <ul> <li>ExtraTreeClassifier for classification tasks.</li> <li>ExtraTreeRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/etree/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"ETree\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/etree/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterscriterionCategoricalDistribution(choices=('gini', 'entropy'))splitterCategoricalDistribution(choices=('random', 'best'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterscriterionCategoricalDistribution(choices=('squared_error', 'absolute_error'))splitterCategoricalDistribution(choices=('random', 'best'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/etree/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/etree/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/etree/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/etree/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/etree/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ets/", "title": "ETS", "text": "<p>ETSnative multioutput</p> <p>The ETS models are a family of time series models with an underlying state space model consisting of a level component, a trend component (T), a seasonal component (S), and an error term (E).</p> <p>Corresponding estimators are:</p> <ul> <li>AutoETS for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ExponentialSmoothing Exponential Smoothing forecaster.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/ets/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"ETS\", verbose=2)\n</code></pre>"}, {"location": "API/models/ets/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterserrorCategoricalDistribution(choices=('add', 'mul'))trendCategoricalDistribution(choices=('add', 'mul', None))damped_trendCategoricalDistribution(choices=(True, False))seasonalCategoricalDistribution(choices=('add', 'mul', None))spCategoricalDistribution(choices=(1, 4, 6, 7, 12))initialization_methodCategoricalDistribution(choices=('estimated', 'heuristic'))maxiterIntDistribution(high=2000, log=False, low=500, step=100)autoCategoricalDistribution(choices=(True, False))information_criterionCategoricalDistribution(choices=('aic', 'bic', 'aicc'))</p> <p></p> <p></p>"}, {"location": "API/models/ets/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ets/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ets/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ets/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/ets/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gbm/", "title": "GradientBoostingMachine", "text": "<p>GBMaccept sparse</p> <p>A Gradient Boosting Machine builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage <code>n_classes_</code> regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.</p> <p>Corresponding estimators are:</p> <ul> <li>GradientBoostingClassifier for classification tasks.</li> <li>GradientBoostingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Tip</p> <p>HistGradientBoosting is a much faster variant of this algorithm for intermediate datasets (n_samples &gt;= 10k).</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>HistGradientBoosting Histogram-based Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/gbm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GBM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gbm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterslossCategoricalDistribution(choices=('log_loss', 'exponential'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)n_estimatorsIntDistribution(high=500, log=False, low=10, step=10)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)criterionCategoricalDistribution(choices=('friedman_mse', 'squared_error'))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_depthIntDistribution(high=21, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'absolute_error', 'huber', 'quantile'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)n_estimatorsIntDistribution(high=500, log=False, low=10, step=10)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)criterionCategoricalDistribution(choices=('friedman_mse', 'squared_error'))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_depthIntDistribution(high=21, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)alphaFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/gbm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gbm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gbm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gbm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gbm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gnb/", "title": "GaussianNB", "text": "<p>GNBsupports acceleration</p> <p>Gaussian Naive Bayes implements the Naive Bayes algorithm for classification. The likelihood of the features is assumed to be Gaussian.</p> <p>Corresponding estimators are:</p> <ul> <li>GaussianNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p></p>"}, {"location": "API/models/gnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gp/", "title": "GaussianProcess", "text": "<p>GP</p> <p>Gaussian Processes are a generic supervised learning method designed to solve regression and probabilistic classification problems. The advantages of Gaussian processes are:</p> <ul> <li>The prediction interpolates the observations.</li> <li>The prediction is probabilistic (Gaussian) so that one can compute   empirical confidence intervals and decide based on those if one   should refit (online fitting, adaptive fitting) the prediction in   some region of interest.</li> </ul> <p>The disadvantages of Gaussian processes include:</p> <ul> <li>They are not sparse, i.e. they use the whole samples/features   information to perform the prediction.</li> <li>They lose efficiency in high dimensional spaces, namely when the   number of features exceeds a few dozens.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>GaussianProcessClassifier for classification tasks.</li> <li>GaussianProcessRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>PassiveAggressive Passive Aggressive.</p> <p></p>"}, {"location": "API/models/gp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GP\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/hgbm/", "title": "HistGradientBoosting", "text": "<p>hGBM</p> <p>This Histogram-based Gradient Boosting Machine is much faster than the standard GradientBoostingMachine for big datasets (n_samples&gt;=10k). This variation first bins the input samples into integer-valued bins which tremendously reduces the number of splitting points to consider, and allows the algorithm to leverage integer-based data structures (histograms) instead of relying on sorted continuous values when building the trees.</p> <p>Corresponding estimators are:</p> <ul> <li>HistGradientBoostingClassifier for classification tasks.</li> <li>HistGradientBoostingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/hgbm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"hGBM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/hgbm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parameterslearning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_iterIntDistribution(high=500, log=False, low=10, step=10)max_leaf_nodesIntDistribution(high=50, log=False, low=10, step=1)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_leafIntDistribution(high=30, log=False, low=10, step=1)l2_regularizationFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson', 'quantile', 'gamma'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_iterIntDistribution(high=500, log=False, low=10, step=10)max_leaf_nodesIntDistribution(high=50, log=False, low=10, step=1)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_leafIntDistribution(high=30, log=False, low=10, step=1)l2_regularizationFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/hgbm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/hgbm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/hgbm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/hgbm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/hgbm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/huber/", "title": "HuberRegression", "text": "<p>Huberneeds scaling</p> <p>Huber is a linear regression model that is robust to outliers. It makes sure that the loss function is not heavily influenced by the outliers while not completely ignoring their effect.</p> <p>Corresponding estimators are:</p> <ul> <li>HuberRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>AutomaticRelevanceDetermination Automatic Relevance Determination.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/huber/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Huber\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/huber/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersepsilonFloatDistribution(high=10.0, log=True, low=1.0, step=None)max_iterIntDistribution(high=500, log=False, low=50, step=10)alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/huber/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/huber/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/huber/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/huber/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/huber/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/knn/", "title": "KNearestNeighbors", "text": "<p>KNNneeds scalingaccept sparsenative multilabelnative multioutputsupports acceleration</p> <p>K-Nearest Neighbors, as the name clearly indicates, implements the k-nearest neighbors vote. For regression, the target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.</p> <p>Corresponding estimators are:</p> <ul> <li>KNeighborsClassifier for classification tasks.</li> <li>KNeighborsRegressor for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p></p>"}, {"location": "API/models/knn/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"KNN\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/knn/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> cpugpu <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> sklearnsklearnexcuml <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> cpugpu <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p></p> <p></p>"}, {"location": "API/models/knn/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/knn/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/knn/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/knn/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/knn/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lars/", "title": "LeastAngleRegression", "text": "<p>Larsneeds scaling</p> <p>Least-Angle Regression is a regression algorithm for high-dimensional data. Lars is similar to forward stepwise regression. At each step, it finds the feature most correlated with the target. When there are multiple features having equal correlation, instead of continuing along the same feature, it proceeds in a direction equiangular between the features.</p> <p>Corresponding estimators are:</p> <ul> <li>Lars for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>HuberRegression Huber regressor.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/lars/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Lars\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/lars/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lars/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lars/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lars/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lars/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lasso/", "title": "Lasso", "text": "<p>Lassoneeds scalingaccept sparsesupports acceleration</p> <p>Linear least squares with l1 regularization.</p> <p>Corresponding estimators are:</p> <ul> <li>Lasso for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/lasso/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Lasso\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/lasso/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p></p> <p></p>"}, {"location": "API/models/lasso/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lasso/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lasso/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lasso/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lasso/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lda/", "title": "LinearDiscriminantAnalysis", "text": "<p>LDA</p> <p>Linear Discriminant Analysis is a classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes\u2019 rule. The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearDiscriminantAnalysis for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LogisticRegression Logistic Regression.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p></p>"}, {"location": "API/models/lda/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"LDA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lda/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterssolverCategoricalDistribution(choices=('svd', 'lsqr', 'eigen'))shrinkageCategoricalDistribution(choices=(None, 'auto', 0.5, 0.6, 0.7, 0.8, 0.9, 1.0))</p> <p></p> <p></p>"}, {"location": "API/models/lda/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lda/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lda/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lda/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lda/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lgb/", "title": "LightGBM", "text": "<p>LGBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>LightGBM is a gradient boosting model that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:</p> <ul> <li>Faster training speed and higher efficiency.</li> <li>Lower memory usage.</li> <li>Better accuracy.</li> <li>Capable of handling large-scale data.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>LGBMClassifier for classification tasks.</li> <li>LGBMRegressor for regression tasks.</li> </ul> <p>Read more in LightGBM's documentation.</p> <p>Info</p> <p>Using LightGBM's GPU acceleration requires additional software dependencies.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/lgb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"LGB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lgb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=17, log=False, low=-1, step=2)num_leavesIntDistribution(high=40, log=False, low=20, step=1)min_child_weightFloatDistribution(high=100.0, log=True, low=0.0001, step=None)min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=17, log=False, low=-1, step=2)num_leavesIntDistribution(high=40, log=False, low=20, step=1)min_child_weightFloatDistribution(high=100.0, log=True, low=0.0001, step=None)min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/lgb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lgb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lgb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lgb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lgb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lr/", "title": "LogisticRegression", "text": "<p>LRneeds scalingaccept sparsesupports acceleration</p> <p>Logistic regression, despite its name, is a linear model for classification rather than regression. Logistic regression is also known in the literature as logit regression, maximum-entropy classification (MaxEnt) or the log-linear classifier. In this model, the probabilities describing the possible outcomes of a single trial are modeled using a logistic function.</p> <p>Corresponding estimators are:</p> <ul> <li>LogisticRegression for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GaussianProcess Gaussian process.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>PassiveAggressive Passive Aggressive.</p> <p></p>"}, {"location": "API/models/lr/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"RF\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lr/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> cpugpu <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/lr/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lr/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lr/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lr/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lr/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lsvm/", "title": "LinearSVM", "text": "<p>lSVMneeds scalingaccept sparsesupports acceleration</p> <p>Similar to SupportVectorMachine but with a linear kernel. Implemented in terms of liblinear rather than libsvm, so it has more flexibility in the choice of penalties and loss functions and should scale better to large numbers of samples.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearSVC for classification tasks.</li> <li>LinearSVR for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>KNearestNeighbors K-Nearest Neighbors.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p>SupportVectorMachine Support Vector Machine.</p> <p></p>"}, {"location": "API/models/lsvm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"lSVM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lsvm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearncuml <p>ParameterspenaltyCategoricalDistribution(choices=('l1', 'l2'))lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p>ParameterspenaltyCategoricalDistribution(choices=('l1', 'l2'))lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> sklearncuml <p>ParameterslossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p>ParameterslossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/lsvm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lsvm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lsvm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lsvm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lsvm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/mlp/", "title": "MultiLayerPerceptron", "text": "<p>MLPneeds scalingaccept sparsenative multilabelallows validation</p> <p>Multi-layer Perceptron is a supervised learning algorithm that learns a function by training on a dataset. Given a set of features and a target, it can learn a non-linear function approximator for either classification or regression. It is different from logistic regression, in that between the input and the output layer, there can be one or more non-linear layers, called hidden layers.</p> <p>Corresponding estimators are:</p> <ul> <li>MLPClassifier for classification tasks.</li> <li>MLPRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>PassiveAggressive Passive Aggressive.</p> <p>Perceptron Linear Perceptron classification.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/mlp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"MLP\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/mlp/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametershidden_layer_1IntDistribution(high=100, log=False, low=10, step=1)hidden_layer_2IntDistribution(high=100, log=False, low=0, step=1)hidden_layer_3IntDistribution(high=10, log=False, low=0, step=1)activationCategoricalDistribution(choices=('identity', 'logistic', 'tanh', 'relu'))solverCategoricalDistribution(choices=('lbfgs', 'sgd', 'adam'))alphaFloatDistribution(high=0.1, log=True, low=0.0001, step=None)batch_sizeCategoricalDistribution(choices=('auto', 8, 16, 32, 64, 128, 256))learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'adaptive'))learning_rate_initFloatDistribution(high=0.1, log=True, low=0.001, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=500, log=False, low=50, step=10)</p> <p>Parametershidden_layer_1IntDistribution(high=100, log=False, low=10, step=1)hidden_layer_2IntDistribution(high=100, log=False, low=0, step=1)hidden_layer_3IntDistribution(high=10, log=False, low=0, step=1)activationCategoricalDistribution(choices=('identity', 'logistic', 'tanh', 'relu'))solverCategoricalDistribution(choices=('lbfgs', 'sgd', 'adam'))alphaFloatDistribution(high=0.1, log=True, low=0.0001, step=None)batch_sizeCategoricalDistribution(choices=('auto', 8, 16, 32, 64, 128, 256))learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'adaptive'))learning_rate_initFloatDistribution(high=0.1, log=True, low=0.001, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=500, log=False, low=50, step=10)</p> <p></p> <p></p>"}, {"location": "API/models/mlp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/mlp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/mlp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/mlp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/mlp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/mnb/", "title": "MultinomialNB", "text": "<p>MNBaccept sparsesupports acceleration</p> <p>MultinomialNB implements the Naive Bayes algorithm for multinomially distributed data, and is one of the two classic Naive Bayes variants used in text classification (where the data are typically represented as word vector counts, although tf-idf vectors are also known to work well in practice).</p> <p>Corresponding estimators are:</p> <ul> <li>MultinomialNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p></p>"}, {"location": "API/models/mnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"MNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/mnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/mnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/mnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/mnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/mnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/mnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/nf/", "title": "NaiveForecaster", "text": "<p>NFnative multioutput</p> <p>NaiveForecaster is a dummy forecaster that makes forecasts using simple strategies based on naive assumptions about past trends continuing. When used in multivariate tasks, each column is forecasted with the same strategy.</p> <p>Corresponding estimators are:</p> <ul> <li>NaiveForecaster for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ExponentialSmoothing Exponential Smoothing forecaster.</p> <p>Dummy Dummy classifier/regressor.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/nf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"NF\", verbose=2)\n</code></pre>"}, {"location": "API/models/nf/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersstrategyCategoricalDistribution(choices=('last', 'mean', 'drift'))</p> <p></p> <p></p>"}, {"location": "API/models/nf/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/nf/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/nf/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/nf/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/nf/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ols/", "title": "OrdinaryLeastSquares", "text": "<p>OLSneeds scalingaccept sparsesupports acceleration</p> <p>Ordinary Least Squares is just linear regression without any regularization. It fits a linear model with coefficients <code>w=(w1,  ..., wp)</code> to minimize the residual sum of squares between the observed targets in the dataset, and the targets predicted by the linear approximation.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearRegression for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/ols/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"OLS\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ols/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ols/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ols/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ols/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/ols/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/omp/", "title": "OrthogonalMatchingPursuit", "text": "<p>OMPneeds scaling</p> <p>Orthogonal Matching Pursuit implements the OMP algorithm for approximating the fit of a linear model with constraints imposed on the number of non-zero coefficients.</p> <p>Corresponding estimators are:</p> <ul> <li>OrthogonalMatchingPursuit for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/omp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"OMP\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/omp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/omp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/omp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/omp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/omp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/pa/", "title": "PassiveAggressive", "text": "<p>PAneeds scalingaccept sparseallows validation</p> <p>The passive-aggressive algorithms are a family of algorithms for large-scale learning. They are similar to the Perceptron in that they do not require a learning rate. However, contrary to the Perceptron, they include a regularization parameter <code>C</code>.</p> <p>Corresponding estimators are:</p> <ul> <li>PassiveAggressiveClassifier for classification tasks.</li> <li>PassiveAggressiveRegressor for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>Perceptron Linear Perceptron classification.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/pa/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"PA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/pa/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)max_iterIntDistribution(high=1500, log=False, low=500, step=50)lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))averageCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)max_iterIntDistribution(high=1500, log=False, low=500, step=50)lossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))averageCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/pa/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/pa/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/pa/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/pa/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/pa/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/perc/", "title": "Perceptron", "text": "<p>Percneeds scalingallows validation</p> <p>The Perceptron is a simple classification algorithm suitable for large scale learning. By default:</p> <ul> <li>It does not require a learning rate.</li> <li>It is not regularized (penalized).</li> <li>It updates its model only on mistakes.</li> </ul> <p>The last characteristic implies that the Perceptron is slightly faster to train than StochasticGradientDescent with the hinge loss and that the resulting models are sparser.</p> <p>Corresponding estimators are:</p> <ul> <li>Perceptron for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>PassiveAggressive Passive Aggressive.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/perc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Perc\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/perc/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l2', 'l1', 'elasticnet'))alphaFloatDistribution(high=10.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/perc/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/perc/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/perc/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/perc/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/perc/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/pt/", "title": "PolynomialTrend", "text": "<p>PTnative multioutput</p> <p>Forecast time series data with a polynomial trend, using a sklearn LinearRegression class to regress values of time series on index, after extraction of polynomial features.</p> <p>Corresponding estimators are:</p> <ul> <li>PolynomialTrendForecaster for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p>NaiveForecaster Naive Forecaster.</p> <p></p>"}, {"location": "API/models/pt/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"PT\", verbose=2)\n</code></pre>"}, {"location": "API/models/pt/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersdegreeIntDistribution(high=5, log=False, low=1, step=1)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/pt/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/pt/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/pt/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/pt/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/pt/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/qda/", "title": "QuadraticDiscriminantAnalysis", "text": "<p>QDA</p> <p>Quadratic Discriminant Analysis is a classifier with a quadratic decision boundary, generated by fitting class conditional densities to the data and using Bayes\u2019 rule. The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.</p> <p>Corresponding estimators are:</p> <ul> <li>QuadraticDiscriminantAnalysis for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>LogisticRegression Logistic Regression.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p></p>"}, {"location": "API/models/qda/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"QDA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/qda/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersreg_paramFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/qda/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/qda/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/qda/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/qda/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/qda/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/rf/", "title": "RandomForest", "text": "<p>RFaccept sparsenative multilabelnative multioutputsupports acceleration</p> <p>Random forests are an ensemble learning method that operate by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees. Random forests correct for decision trees' habit of overfitting to their training set.</p> <p>Corresponding estimators are:</p> <ul> <li>RandomForestClassifier for classification tasks.</li> <li>RandomForestRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Warning</p> <p>cuML's implementation of RandomForestClassifier only supports predictions on dtype <code>float32</code>. Convert all dtypes before calling atom's run method to avoid exceptions.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>HistGradientBoosting Histogram-based Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/rf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"RF\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/rf/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> cpugpu <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> sklearnsklearnexcuml <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> cpugpu <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/rf/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/rf/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/rf/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/rf/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/rf/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ridge/", "title": "Ridge", "text": "<p>Ridgeneeds scalingaccept sparsenative multilabelsupports acceleration</p> <p>If classifier, it first converts the target values into {-1, 1} and then treats the problem as a regression task.</p> <p>Corresponding estimators are:</p> <ul> <li>RidgeClassifier for classification tasks.</li> <li>Ridge for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Warning</p> <p>Engines <code>sklearnex</code> and <code>cuml</code> are only available for regression tasks.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>Lasso Linear Regression with lasso regularization.</p> <p></p>"}, {"location": "API/models/ridge/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Ridge\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ridge/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p></p> <p></p>"}, {"location": "API/models/ridge/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ridge/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ridge/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ridge/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/ridge/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/rnn/", "title": "RadiusNearestNeighbors", "text": "<p>RNNneeds scalingaccept sparsenative multilabelnative multioutput</p> <p>Radius Nearest Neighbors implements the nearest neighbors vote, where the neighbors are selected from within a given radius. For regression, the target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.</p> <p>Warning</p> <ul> <li>The <code>radius</code> parameter should be tuned to the data at hand or   the model will perform poorly.</li> <li>If outliers are detected, the estimator raises an exception   unless <code>est_params={\"outlier_label\": \"most_frequent\"}</code> is used.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>RadiusNeighborsClassifier for classification tasks.</li> <li>RadiusNeighborsRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>KNearestNeighbors K-Nearest Neighbors.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p></p>"}, {"location": "API/models/rnn/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RNN\",\n    metric=\"f1\",\n    est_params={\"outlier_label\": \"most_frequent\"},\n    verbose=2,\n)\n</code></pre>"}, {"location": "API/models/rnn/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersradiusFloatDistribution(high=100.0, log=False, low=0.01, step=None)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>ParametersradiusFloatDistribution(high=100.0, log=False, low=0.01, step=None)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p></p> <p></p>"}, {"location": "API/models/rnn/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/rnn/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/rnn/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/rnn/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/rnn/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/sgd/", "title": "StochasticGradientDescent", "text": "<p>SGDneeds scalingaccept sparseallows validation</p> <p>Stochastic Gradient Descent is a simple yet very efficient approach to fitting linear classifiers and regressors under convex loss functions. Even though SGD has been around in the machine learning community for a long time, it has received a considerable amount of attention just recently in the context of large-scale learning.</p> <p>Corresponding estimators are:</p> <ul> <li>SGDClassifier for classification tasks.</li> <li>SGDRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>PassiveAggressive Passive Aggressive.</p> <p>SupportVectorMachine Support Vector Machine.</p> <p></p>"}, {"location": "API/models/sgd/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"SGD\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/sgd/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterslossCategoricalDistribution(choices=('hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'))penaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)epsilonFloatDistribution(high=1.0, log=True, low=0.0001, step=None)learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'optimal', 'adaptive'))eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)averageCategoricalDistribution(choices=(True, False))</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'))penaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)epsilonFloatDistribution(high=1.0, log=True, low=0.0001, step=None)learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'optimal', 'adaptive'))eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)averageCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/sgd/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/sgd/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/sgd/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/sgd/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/sgd/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/svm/", "title": "SupportVectorMachine", "text": "<p>SVMneeds scalingaccept sparsesupports acceleration</p> <p>The implementation of the Support Vector Machine is based on libsvm. The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples. For large datasets consider using a LinearSVM or a StochasticGradientDescent model instead.</p> <p>Corresponding estimators are:</p> <ul> <li>SVC for classification tasks.</li> <li>SVR for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearSVM Linear Support Vector Machine.</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/svm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"SVM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/svm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> cpugpu <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> sklearnsklearnexcuml <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> cpugpu <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/svm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/svm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/svm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/svm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/svm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/tree/", "title": "DecisionTree", "text": "<p>Treeaccept sparsenative multilabelnative multioutput</p> <p>A single decision tree classifier/regressor.</p> <p>Corresponding estimators are:</p> <ul> <li>DecisionTreeClassifier for classification tasks.</li> <li>DecisionTreeRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/tree/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Tree\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/tree/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterscriterionCategoricalDistribution(choices=('gini', 'entropy'))splitterCategoricalDistribution(choices=('best', 'random'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterscriterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'friedman_mse', 'poisson'))splitterCategoricalDistribution(choices=('best', 'random'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/tree/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/tree/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/tree/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/tree/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/tree/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/xgb/", "title": "XGBoost", "text": "<p>XGBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>XGBoost is an optimized distributed gradient boosting model designed to be highly efficient, flexible and portable. XGBoost provides a parallel tree boosting that solve many data science problems in a fast and accurate way.</p> <p>Corresponding estimators are:</p> <ul> <li>XGBClassifier for classification tasks.</li> <li>XGBRegressor for regression tasks.</li> </ul> <p>Read more in XGBoost's documentation.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/xgb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"XGB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/xgb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=20, log=False, low=1, step=1)gammaFloatDistribution(high=1.0, log=False, low=0.0, step=None)min_child_weightIntDistribution(high=10, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=20, log=False, low=1, step=1)gammaFloatDistribution(high=1.0, log=False, low=0.0, step=None)min_child_weightIntDistribution(high=10, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/xgb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/xgb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/xgb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: Trial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: int | NoneDuration of the hyperparameter tuning (in seconds).estimator: PredictorEstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: float | list[float]Metric score on the training set.score_test: float | list[float]Metric score on the test set.score_holdout: float | list[float]Metric score on the holdout set.time_fit: intDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: float | list[float] | NoneMean metric score on the bootstrapped samples.time_bootstrap: int | NoneDuration of the bootstrapping (in seconds).time: intTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/xgb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/xgb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/nlp/textcleaner/", "title": "TextCleaner", "text": "<p>class atom.nlp.TextCleaner(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, verbose=0, logger=None)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>This class can be accessed from atom through the textclean method. Read more in the user guide.</p> <p>Parametersdecode: bool, default=True Whether to decode unicode characters to their ascii representations. <p>lower_case: bool, default=True Whether to convert all characters to lower case. <p>drop_email: bool, default=True Whether to drop email addresses from the text. <p>regex_email: str, default=None Regex used to search for email addresses. If None, it uses <code>r\"[\\w.-]+@[\\w-]+\\.[\\w.-]+\"</code>. <p>drop_url: bool, default=True Whether to drop URL links from the text. <p>regex_url: str, default=None Regex used to search for URLs. If None, it uses <code>r\"https?://\\S+|www\\.\\S+\"</code>. <p>drop_html: bool, default=True Whether to drop HTML tags from the text. This option is particularly useful if the data was scraped from a website. <p>regex_html: str, default=None Regex used to search for html tags. If None, it uses <code>r\"&lt;.*?&gt;\"</code>. <p>drop_emoji: bool, default=True Whether to drop emojis from the text. <p>regex_emoji: str, default=None Regex used to search for emojis. If None, it uses <code>r\":[a-z_]+:\"</code>. <p>drop_number: bool, default=True Whether to drop numbers from the text. <p>regex_number: str, default=None Regex used to search for numbers. If None, it uses <code>r\"\b\\d+\b\".</code> Note that numbers adjacent to letters are not removed. <p>drop_punctuation: bool, default=True Whether to drop punctuations from the text. Characters considered punctuation are <code>!\"#$%&amp;'()*+,-./:;&lt;=&gt;?@[\\]^_</code>~`. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesdrops: pd.DataFrame Encountered regex matches. The row indices correspond to the document index from which the occurrence was dropped. <p></p> <p></p> <p>See Also</p> <p>TextNormalizer Normalize the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/textcleaner/#example", "title": "Example", "text": "atomstand-alone <pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.textclean(verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>import numpy as np\nfrom atom.nlp import TextCleaner\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\ntextcleaner = TextCleaner(verbose=2)\nX = textcleaner.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/textcleaner/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the transformations to the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the transformations to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/textnormalizer/", "title": "TextNormalizer", "text": "<p>class atom.nlp.TextNormalizer(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, verbose=0, logger=None)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>This class can be accessed from atom through the textnormalize method. Read more in the user guide.</p> <p>Parametersstopwords: bool or str, default=True Whether to remove a predefined dictionary of stopwords. <ul> <li>If False: Don't remove any predefined stopwords.</li> <li>If True: Drop predefined english stopwords from the text.</li> <li>If str: Language from <code>nltk.corpus.stopwords.words</code>.</li> </ul> <p>custom_stopwords: sequence or None, default=None Custom stopwords to remove from the text. <p>stem: bool or str, default=False Whether to apply stemming using SnowballStemmer. <ul> <li>If False: Don't apply stemming.</li> <li>If True: Apply stemmer based on the english language.</li> <li>If str: Language from <code>SnowballStemmer.languages</code>.</li> </ul> <p>lemmatize: bool, default=True Whether to apply lemmatization using WordNetLemmatizer. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/textnormalizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.textnormalize(stopwords=\"english\", lemmatize=True, verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import TextNormalizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\ntextnormalizer = TextNormalizer(\n    stopwords=\"english\",\n    lemmatize=True,\n    verbose=2,\n)\nX = textnormalizer.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/textnormalizer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformNormalize the text.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Normalize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/tokenizer/", "title": "Tokenizer", "text": "<p>class atom.nlp.Tokenizer(bigram_freq=None, trigram_freq=None, quadgram_freq=None, verbose=0, logger=None)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>This class can be accessed from atom through the tokenize method. Read more in the user guide.</p> <p>Parametersbigram_freq: int, float or None, default=None Frequency threshold for bigram creation. <ul> <li>If None: Don't create any bigrams.</li> <li>If int: Minimum number of occurrences to make a bigram.</li> <li>If float: Minimum frequency fraction to make a bigram.</li> </ul> <p>trigram_freq: int, float or None, default=None Frequency threshold for trigram creation. <ul> <li>If None: Don't create any trigrams.</li> <li>If int: Minimum number of occurrences to make a trigram.</li> <li>If float: Minimum frequency fraction to make a trigram.</li> </ul> <p>quadgram_freq: int, float or None, default=None Frequency threshold for quadgram creation. <ul> <li>If None: Don't create any quadgrams.</li> <li>If int: Minimum number of occurrences to make a quadgram.</li> <li>If float: Minimum frequency fraction to make a quadgram.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesbigrams: pd.DataFrame Created bigrams and their frequencies. <p>trigrams: pd.DataFrame Created trigrams and their frequencies. <p>quadgrams: pd.DataFrame Created quadgrams and their frequencies. <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>TextNormalizer Normalize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/tokenizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.tokenize(verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import Tokenizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\ntokenizer = Tokenizer(bigram_freq=2, verbose=2)\nX = tokenizer.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/tokenizer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformTokenize the text.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Tokenize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/vectorizer/", "title": "Vectorizer", "text": "<p>class atom.nlp.Vectorizer(strategy=\"bow\", return_sparse=True, device=\"cpu\", engine=None, verbose=0, logger=None, **kwargs)[source]Vectorize text data.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>This class can be accessed from atom through the vectorize method. Read more in the user guide.</p> <p>Parametersstrategy: str, default=\"bow\" Strategy with which to vectorize the text. Choose from: <ul> <li>\"bow\": Bag of Words.</li> <li>\"tfidf\": Term Frequency - Inverse Document Frequency.</li> <li>\"hashing\": Vectorize to a matrix of token occurrences.</li> </ul> <p>return_sparse: bool, default=True Whether to return the transformation output as a dataframe of sparse arrays. Must be False when there are other columns in X (besides <code>corpus</code>) that are non-sparse. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Estimator instance (lowercase strategy) used to vectorize the corpus, e.g. <code>vectorizer.tfidf</code> for the tfidf strategy. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>TextNormalizer Normalize the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p></p>"}, {"location": "API/nlp/vectorizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.vectorize(strategy=\"tfidf\", verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import Vectorizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\nvectorizer = Vectorizer(strategy=\"tfidf\", verbose=2)\nX = vectorizer.fit_transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/vectorizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformVectorize the text.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsVectorizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Vectorize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/plots/plot_calibration/", "title": "plot_calibration", "text": "<p>method plot_calibration(models=None, dataset=\"test\", n_bins=10, target=0, title=None, legend=\"upper left\", figsize=(900, 900), filename=None, display=True)[source]Plot the calibration curve for a binary classifier.</p> <p>Well calibrated classifiers are probabilistic classifiers for which the output of the <code>predict_proba</code> method can be directly interpreted as a confidence level. For instance a well calibrated (binary) classifier should classify the samples such that among the samples to which it gave a <code>predict_proba</code> value close to 0.8, approx. 80% actually belong to the positive class. Read more in sklearn's documentation.</p> <p>This figure shows two plots: the calibration curve, where the x-axis represents the average predicted probability in each bin and the y-axis is the fraction of positives, i.e. the proportion of samples whose class is the positive class (in each bin); and a distribution of all predicted probabilities of the classifier. This plot is available only for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Tip</p> <p>Use the calibrate method to calibrate the winning model.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>n_bins: int, default=10 Number of bins used for calibration. Minimum of 5 required. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 900) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_calibration/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"RF\", \"LGB\"])\natom.plot_calibration()\n</code></pre>"}, {"location": "API/plots/plot_components/", "title": "plot_components", "text": "<p>method plot_components(show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the explained variance ratio per component.</p> <p>Kept components are colored and discarted components are transparent. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parametersshow: int or None, default=None Number of components to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of components shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_pca Plot the explained variance ratio vs number of components.</p> <p>plot_rfecv Plot the rfecv results.</p> <p></p>"}, {"location": "API/plots/plot_components/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"pca\", n_features=5)\natom.plot_components(show=10)\n</code></pre>"}, {"location": "API/plots/plot_confusion_matrix/", "title": "plot_confusion_matrix", "text": "<p>method plot_confusion_matrix(models=None, dataset=\"test\", target=0, threshold=0.5, title=None, legend=\"upper right\", figsize=None, filename=None, display=True)[source]Plot a model's confusion matrix.</p> <p>For one model, the plot shows a heatmap. For multiple models, it compares TP, FP, FN and TN in a barplot (not implemented for multiclass classification tasks). This plot is available only for classification tasks.</p> <p>Tip</p> <p>Fill the <code>threshold</code> parameter with the result from the model's <code>get_best_threshold</code> method to optimize the results.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the confusion matrix. Choose from:` \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>threshold: float, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only for binary classification tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the plot's type. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_calibration Plot the calibration curve for a binary classifier.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_confusion_matrix/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, test_size=0.4)\natom.run([\"LR\", \"RF\"])\natom.lr.plot_confusion_matrix()  # For one model\natom.plot_confusion_matrix()  # For multiple models\n</code></pre>"}, {"location": "API/plots/plot_correlation/", "title": "plot_correlation", "text": "<p>method plot_correlation(columns=None, method=\"pearson\", title=None, legend=None, figsize=(800, 700), filename=None, display=True)[source]Plot a correlation matrix.</p> <p>Displays a heatmap showing the correlation between columns in the dataset. The colors red, blue and white stand for positive, negative, and no correlation respectively.</p> <p>Parameterscolumns: slice, sequence or None, default=None Columns to plot. If None, plot all columns in the dataset. Selected categorical columns are ignored. <p>method: str, default=\"pearson\" Method of correlation. Choose from: pearson, kendall or spearman. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(800, 700) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_distribution Plot column distributions.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_correlation/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_correlation()\n</code></pre>"}, {"location": "API/plots/plot_det/", "title": "plot_det", "text": "<p>method plot_det(models=None, dataset=\"test\", target=0, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the Detection Error Tradeoff curve.</p> <p>Read more about DET in sklearn's documentation. Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_det/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_det()\n</code></pre>"}, {"location": "API/plots/plot_distribution/", "title": "plot_distribution", "text": "<p>method plot_distribution(columns=0, distributions=None, show=None, title=None, legend=\"upper right\", figsize=None, filename=None, display=True)[source]Plot column distributions.</p> <ul> <li>For numerical columns, plot the probability density   distribution. Additionally, it's possible to plot any of   <code>scipy.stats</code> distributions fitted to the column.</li> <li>For categorical columns, plot the class distribution.   Only one categorical column can be plotted at the same time.</li> </ul> <p>Tip</p> <p>Use atom's distribution method to check which distribution fits the column best.</p> <p>Parameterscolumns: int, str, slice or sequence, default=0 Columns to plot. I's only possible to plot one categorical column. If more than one categorical columns are selected, all categorical columns are ignored. <p>distributions: str, sequence or None, default=None Names of the <code>scipy.stats</code> distributions to fit to the columns. If None, a Gaussian kde distribution is showed. Only for numerical columns. <p>show: int or None, default=None Number of classes (ordered by number of occurrences) to show in the plot. If None, it shows all classes. Only for categorical columns. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None: No title is shown.</li> <li>If str: Text for the title.</li> <li>If dict: title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the plot's type. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_distribution/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a categorical feature\nanimals = [\"cat\", \"dog\", \"bird\", \"lion\", \"zebra\"]\nprobabilities = [0.001, 0.1, 0.2, 0.3, 0.399]\nX[\"animals\"] = np.random.choice(animals, size=len(X), p=probabilities)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_distribution(columns=[0, 1])\natom.plot_distribution(columns=0, distributions=[\"norm\", \"invgauss\"])\natom.plot_distribution(columns=\"animals\")\n</code></pre>"}, {"location": "API/plots/plot_edf/", "title": "plot_edf", "text": "<p>method plot_edf(models=None, metric=None, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot the Empirical Distribution Function of a study.</p> <p>Use this plot to analyze and improve hyperparameter search spaces. The EDF assumes that the value of the objective function is in accordance with the uniform distribution over the objective space. This plot is only available for models that ran hyperparameter tuning.</p> <p>Note</p> <p>Only complete trials are considered when plotting the EDF.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_edf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom optuna.distributions import IntDistribution\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\n\n# Run three models with different search spaces\natom.run(\n    models=\"RF_1\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(6, 10)}},\n)\natom.run(\n    models=\"RF_2\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(11, 15)}},\n)\natom.run(\n    models=\"RF_3\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(16, 20)}},\n)\n\natom.plot_edf()\n</code></pre>"}, {"location": "API/plots/plot_errors/", "title": "plot_errors", "text": "<p>method plot_errors(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot a model's prediction errors.</p> <p>Plot the actual targets from a set against the predicted values generated by the regressor. A linear fit is made on the data. The gray, intersected line shows the identity line. This plot can be useful to detect noise or heteroscedasticity along a range of the target domain. This plot is available only for regression tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_residuals Plot a model's residuals.</p> <p></p>"}, {"location": "API/plots/plot_errors/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run([\"OLS\", \"LGB\"])\natom.plot_errors()\n</code></pre>"}, {"location": "API/plots/plot_evals/", "title": "plot_evals", "text": "<p>method plot_evals(models=None, dataset=\"test\", title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot evaluation curves.</p> <p>The evaluation curves are the main metric scores achieved by the models at every iteration of the training process. This plot is available only for models that allow in-training validation.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the evaluation curves. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\" or \"test\". <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_evals/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"XGB\", \"LGB\"])\natom.plot_evals()\n</code></pre>"}, {"location": "API/plots/plot_feature_importance/", "title": "plot_feature_importance", "text": "<p>method plot_feature_importance(models=None, show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot a model's feature importance.</p> <p>The sum of importances for all features (per model) is 1. This plot is available only for models whose estimator has a <code>scores_</code>, <code>feature_importances_</code> or <code>coef</code> attribute.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_feature_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_feature_importance(show=10)\n</code></pre>"}, {"location": "API/plots/plot_forecast/", "title": "plot_forecast", "text": "<p>method plot_forecast(models=None, fh=\"test\", X=None, target=0, plot_interval=True, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot a time series with model forecasts.</p> <p>This plot is only available for forecasting tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. If no models are selected, only the target column is plotted. <p>fh: int, str, range, sequence or ForecastingHorizon, default=\"test\" Forecast horizon for which to plot the predictions. If string, choose from: \"train\", \"test\" or \"holdout\". Use a sequence or add <code>+</code> between options to select more than one. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. This parameter is ignored if fh is a data set. <p>target: int or str, default=0 Target column to look at. Only for multivariate tasks. <p>plot_interval: bool, default=True Whether to plot prediction intervals instead of the exact prediction values. If True, the plotted estimators should have a <code>predict_interval</code> method. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_forecast/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.plot_forecast()\natom.run(\n    models=\"arima\",\n    est_params={\"order\": (1, 1, 0), \"seasonal_order\": (0, 1, 0, 12)},\n)\natom.plot_forecast()\natom.plot_forecast(fh=\"train+test\", plot_interval=False)\n\n# Forecast the next 4 years starting from the test set\natom.plot_forecast(fh=range(1, 48))\n</code></pre>"}, {"location": "API/plots/plot_gains/", "title": "plot_gains", "text": "<p>method plot_gains(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the cumulative gains curve.</p> <p>This plot is available only for binary and multilabel classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_gains/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_gains()\n</code></pre>"}, {"location": "API/plots/plot_hyperparameter_importance/", "title": "plot_hyperparameter_importance", "text": "<p>method plot_hyperparameter_importance(models=None, metric=0, show=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot a model's hyperparameter importance.</p> <p>The hyperparameter importance are calculated using the fANOVA importance evaluator. The sum of importances for all parameters (per model) is 1. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>show: int or None, default=None Number of hyperparameters (ordered by importance) to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_hyperparameter_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"ET\", \"RF\"], n_trials=10)\natom.plot_hyperparameter_importance()\n</code></pre>"}, {"location": "API/plots/plot_hyperparameters/", "title": "plot_hyperparameters", "text": "<p>method plot_hyperparameters(models=None, params=(0, 1), metric=0, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot hyperparameter relationships in a study.</p> <p>A model's hyperparameters are plotted against each other. The corresponding metric scores are displayed in a contour plot. The markers are the trials in the study. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_hyperparameters()</code>. <p>params: str, slice or sequence, default=(0, 1) Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_hyperparameter_importance Plot a model's hyperparameter importance.</p> <p>plot_parallel_coordinate Plot high-dimensional parameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_hyperparameters/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\", n_trials=15)\natom.plot_hyperparameters(params=(0, 1, 2))\n</code></pre>"}, {"location": "API/plots/plot_learning_curve/", "title": "plot_learning_curve", "text": "<p>method plot_learning_curve(models=None, metric=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the learning curve: score vs number of training samples.</p> <p>This plot is available only for models fitted using train sizing. Ensembles are ignored.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_results Plot the model results.</p> <p>plot_successive_halving Plot scores per iteration of the successive halving.</p> <p></p>"}, {"location": "API/plots/plot_learning_curve/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.train_sizing([\"LR\", \"RF\"], n_bootstrap=5)\natom.plot_learning_curve()\n</code></pre>"}, {"location": "API/plots/plot_lift/", "title": "plot_lift", "text": "<p>method plot_lift(models=None, dataset=\"test\", target=0, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the lift curve.</p> <p>Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_lift/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_lift()\n</code></pre>"}, {"location": "API/plots/plot_ngrams/", "title": "plot_ngrams", "text": "<p>method plot_ngrams(ngram=\"bigram\", index=None, show=10, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot n-gram frequencies.</p> <p>The text for the plot is extracted from the column named <code>corpus</code>. If there is no column with that name, an exception is raised. If the documents are not tokenized, the words are separated by spaces.</p> <p>Tip</p> <p>Use atom's tokenize method to separate the words creating n-grams based on their frequency in the corpus.</p> <p>Parametersngram: str or int, default=\"bigram\" Number of contiguous words to search for (size of n-gram). Choose from: words (1), bigrams (2), trigrams (3), quadgrams (4). <p>index: int, str, slice, sequence or None, default=None Documents in the corpus to include in the search. If None, it selects all documents in the dataset. <p>show: int, default=10 Number of n-grams (ordered by number of occurrences) to show in the plot. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of n-grams shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_wordcloud Plot a wordcloud from the corpus.</p> <p></p>"}, {"location": "API/plots/plot_ngrams/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.textclean()\natom.textnormalize()\natom.plot_ngrams()\n</code></pre>"}, {"location": "API/plots/plot_parallel_coordinate/", "title": "plot_parallel_coordinate", "text": "<p>method plot_parallel_coordinate(models=None, params=None, metric=0, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot high-dimensional parameter relationships in a study.</p> <p>Every line of the plot represents one trial. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_parallel_coordinate()</code>. <p>params: str, slice, sequence or None, default=None Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. If None, all the model's hyperparameters are selected. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_hyperparameter_importance Plot a model's hyperparameter importance.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p></p>"}, {"location": "API/plots/plot_parallel_coordinate/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"RF\", n_trials=15)\natom.plot_parallel_coordinate(params=slice(1, 5))\n</code></pre>"}, {"location": "API/plots/plot_pareto_front/", "title": "plot_pareto_front", "text": "<p>method plot_pareto_front(models=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot the Pareto front of a study.</p> <p>Shows the trial scores plotted against each other. The marker's colors indicate the trial number. This plot is only available for models that ran multi-metric runs with hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_pareto_front()</code>. <p>metric: str, sequence or None, default=None Metrics to plot.  Use a sequence or add <code>+</code> between options to select more than one. If None, the metrics used to run the pipeline are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of metrics shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_slice Plot the parameter relationship in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_pareto_front/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RF\",\n    metric=[\"f1\", \"accuracy\", \"recall\"],\n    n_trials=15,\n )\natom.plot_pareto_front()\n</code></pre>"}, {"location": "API/plots/plot_parshap/", "title": "plot_parshap", "text": "<p>method plot_parshap(models=None, columns=None, target=1, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot the partial correlation of shap values.</p> <p>Plots the train and test correlation between the shap value of every feature with its target value, after removing the effect of all other features (partial correlation). This plot is useful to identify the features that are contributing most to overfitting. Features that lie below the bisector (diagonal line) performed worse on the test set than on the training set. If the estimator has a <code>scores_</code>, <code>feature_importances_</code> or <code>coef_</code> attribute, its normalized values are shown in a color map.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>columns: int, str, slice, sequence or None, default=None Features to plot. If None, it plots all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_parshap/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"RF\"])\natom.rf.plot_parshap(legend=None)\natom.plot_parshap(columns=slice(5, 10))\n</code></pre>"}, {"location": "API/plots/plot_partial_dependence/", "title": "plot_partial_dependence", "text": "<p>method plot_partial_dependence(models=None, columns=None, kind=\"average\", pair=None, target=1, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the partial dependence of features.</p> <p>The partial dependence of a feature (or a set of features) corresponds to the response of the model for each possible value of the feature. The plot can take two forms:</p> <ul> <li>If <code>pair</code> is None: Single feature partial dependence lines.   The deciles of the feature values are shown with tick marks   on the bottom.</li> <li>If <code>pair</code> is defined: Two-way partial dependence plots are   plotted as contour plots (only allowed for a single model).</li> </ul> <p>Read more about partial dependence on sklearn's documentation. This plot is not available for multilabel nor multiclass-multioutput classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>columns: int, str, slice, sequence or None, default=None Features to get the partial dependence from. If None, it uses the first 3 features in the dataset. <p>kind: str or sequence, default=\"average\" Kind of depedence to plot. Use a sequence or add <code>+</code> between options to select more than one. Choose from: <ul> <li>\"average\": Partial dependence averaged across all samples   in the dataset.</li> <li>\"individual\": Partial dependence for up to 50 random   samples (Individual Conditional Expectation).</li> </ul> <p>This parameter is ignored when plotting feature pairs.</p> <p>pair: int, str or None, default=None Feature with which to pair the features selected by <code>columns</code>. If specified, the resulting figure displays contour plots. Only allowed when plotting a single model. If None, the plots show the partial dependece of single features. <p>target: int or str, default=1 Class in the target column to look at (only for multiclass classification tasks). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_partial_dependence/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_partial_dependence(kind=\"average+individual\", legend=\"upper left\")\natom.rf.plot_partial_dependence(columns=(3, 4), pair=2)\n</code></pre>"}, {"location": "API/plots/plot_pca/", "title": "plot_pca", "text": "<p>method plot_pca(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the explained variance ratio vs number of components.</p> <p>If the underlying estimator is PCA (for dense datasets), all possible components are plotted. If the underlying estimator is TruncatedSVD (for sparse datasets), it only shows the selected components. The star marks the number of components selected by the user. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_components Plot the explained variance ratio per component.</p> <p>plot_rfecv Plot the rfecv results.</p> <p></p>"}, {"location": "API/plots/plot_pca/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"pca\", n_features=5)\natom.plot_pca()\n</code></pre>"}, {"location": "API/plots/plot_permutation_importance/", "title": "plot_permutation_importance", "text": "<p>method plot_permutation_importance(models=None, show=None, n_repeats=10, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the feature permutation importance of models.</p> <p>Warning</p> <p>This method can be slow. Results are cached to fasten repeated calls.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>n_repeats: int, default=10 Number of times to permute each feature. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p></p>"}, {"location": "API/plots/plot_permutation_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_permutation_importance(show=10, n_repeats=7)\n</code></pre>"}, {"location": "API/plots/plot_pipeline/", "title": "plot_pipeline", "text": "<p>method plot_pipeline(models=None, draw_hyperparameter_tuning=True, color_branches=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot a diagram of the pipeline.</p> <p>Warning</p> <p>This plot uses the schemdraw package, which is incompatible with plotly. The returned plot is therefore a matplotlib figure.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models for which to draw the pipeline. If None, all pipelines are plotted. <p>draw_hyperparameter_tuning: bool, default=True Whether to draw if the models used Hyperparameter Tuning. <p>color_branches: bool or None, default=None Whether to draw every branch in a different color. If None, branches are colored when there is more than one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the pipeline drawn. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_wordcloud Plot a wordcloud from the corpus.</p> <p></p>"}, {"location": "API/plots/plot_pipeline/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"RNN\", \"SGD\", \"MLP\"])\natom.voting(models=atom.winners[:2])\natom.plot_pipeline()\n\natom = ATOMClassifier(X, y, random_state=1)\natom.scale()\natom.prune()\natom.run(\"RF\", n_trials=30)\n\natom.branch = \"undersample\"\natom.balance(\"nearmiss\")\natom.run(\"RF_undersample\")\n\natom.branch = \"oversample_from_master\"\natom.balance(\"smote\")\natom.run(\"RF_oversample\")\n\natom.plot_pipeline()\n</code></pre>"}, {"location": "API/plots/plot_prc/", "title": "plot_prc", "text": "<p>method plot_prc(models=None, dataset=\"test\", target=0, title=None, legend=\"lower left\", figsize=(900, 600), filename=None, display=True)[source]Plot the precision-recall curve.</p> <p>Read more about PRC in sklearn's documentation. Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_prc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_prc()\n</code></pre>"}, {"location": "API/plots/plot_probabilities/", "title": "plot_probabilities", "text": "<p>method plot_probabilities(models=None, dataset=\"test\", target=1, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the probability distribution of the target classes.</p> <p>This plot is available only for models with a <code>predict_proba</code> method in classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int, str or tuple, default=1 Probability of being that class in the target column. For multioutput tasks, the value should be a tuple of the form (column, class). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_results Plot the model results.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_probabilities/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_probabilities()\n</code></pre>"}, {"location": "API/plots/plot_qq/", "title": "plot_qq", "text": "<p>method plot_qq(columns=0, distributions=\"norm\", title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot a quantile-quantile plot.</p> <p>Columns are distinguished by color and the distributions are distinguished by marker type. Missing values are ignored.</p> <p>Parameterscolumns: int, str, slice or sequence, default=0 Columns to plot. Selected categorical columns are ignored. <p>distributions: str or sequence, default=\"norm\" Names of the <code>scipy.stats</code> distributions to fit to the columns. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_distribution Plot column distributions.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_qq/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_qq(columns=[5, 6])\natom.plot_qq(columns=0, distributions=[\"norm\", \"invgauss\", \"triang\"])\n</code></pre>"}, {"location": "API/plots/plot_relationships/", "title": "plot_relationships", "text": "<p>method plot_relationships(columns=(0, 1, 2), title=None, legend=None, figsize=(900, 900), filename=None, display=True)[source]Plot pairwise relationships in a dataset.</p> <p>Creates a grid of axes such that each numerical column appears once on the x-axes and once on the y-axes. The bottom triangle contains scatter plots (max 250 random samples), the diagonal plots contain column distributions, and the upper triangle contains contour histograms for all samples in the columns.</p> <p>Parameterscolumns: slice or sequence, default=(0, 1, 2) Columns to plot. Selected categorical columns are ignored. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 900) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_distribution Plot column distributions.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p></p>"}, {"location": "API/plots/plot_relationships/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_relationships(columns=[0, 4, 5])\n</code></pre>"}, {"location": "API/plots/plot_residuals/", "title": "plot_residuals", "text": "<p>method plot_residuals(models=None, dataset=\"test\", target=0, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot a model's residuals.</p> <p>The plot shows the residuals (difference between the predicted and the true value) on the vertical axis and the independent variable on the horizontal axis. The gray, intersected line shows the identity line. This plot can be useful to analyze the variance of the error of the regressor. If the points are randomly dispersed around the horizontal axis, a linear regression model is appropriate for the data; otherwise, a non-linear model is more appropriate. This plot is only available for regression tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_errors Plot a model's prediction errors.</p> <p></p>"}, {"location": "API/plots/plot_residuals/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run([\"OLS\", \"LGB\"])\natom.plot_residuals()\n</code></pre>"}, {"location": "API/plots/plot_results/", "title": "plot_results", "text": "<p>method plot_results(models=None, metric=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the model results.</p> <p>If all models applied bootstrap, the plot is a boxplot. If not, the plot is a barplot. Models are ordered based on their score from the top down. The score is either the <code>score_bootstrap</code> or <code>score_test</code> attribute of the model, selected in that order.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Other available options are \"time_bo\", \"time_fit\", \"time_bootstrap\" and \"time\". If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of models. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_probabilities Plot the probability distribution of the target classes.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_results/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"LR\", \"RF\", \"LGB\"], metric=[\"f1\", \"recall\"])\natom.plot_results()\n\natom.run([\"GNB\", \"LR\", \"RF\", \"LGB\"], metric=[\"f1\", \"recall\"], n_bootstrap=5)\natom.plot_results()\natom.plot_results(metric=\"time_fit+time\")\n</code></pre>"}, {"location": "API/plots/plot_rfecv/", "title": "plot_rfecv", "text": "<p>method plot_rfecv(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the rfecv results.</p> <p>Plot the scores obtained by the estimator fitted on every subset of the dataset. Only available when feature selection was applied with strategy=\"rfecv\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_components Plot the explained variance ratio per component.</p> <p>plot_pca Plot the explained variance ratio vs number of components.</p> <p></p>"}, {"location": "API/plots/plot_rfecv/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"rfecv\", solver=\"Tree\")\natom.plot_rfecv()\n</code></pre>"}, {"location": "API/plots/plot_roc/", "title": "plot_roc", "text": "<p>method plot_roc(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the Receiver Operating Characteristics curve.</p> <p>Read more about ROC in sklearn's documentation. Only available for classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_roc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_roc()\n</code></pre>"}, {"location": "API/plots/plot_shap_bar/", "title": "plot_shap_bar", "text": "<p>method plot_shap_bar(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's bar plot.</p> <p>Create a bar plot of a set of SHAP values. If a single sample is passed, then the SHAP values are plotted. If many samples are passed, then the mean absolute value for each feature column is plotted. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_bar()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_bar/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_bar(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_beeswarm/", "title": "plot_shap_beeswarm", "text": "<p>method plot_shap_beeswarm(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's beeswarm plot.</p> <p>The plot is colored by feature values. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_beeswarm()</code>. <p>index: tuple, slice or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The beeswarm plot does not support plotting a single sample. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_beeswarm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_beeswarm(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_decision/", "title": "plot_shap_decision", "text": "<p>method plot_shap_decision(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's decision plot.</p> <p>Visualize model decisions using cumulative SHAP values. Each plotted line explains a single model prediction. If a single prediction is plotted, feature values are printed in the plot (if supplied). If multiple predictions are plotted together, feature values will not be printed. Plotting too many predictions together will make the plot unintelligible. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_decision()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_decision/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_decision(show=10)\natom.plot_shap_decision(index=-1, show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_force/", "title": "plot_shap_force", "text": "<p>method plot_shap_force(models=None, index=None, target=1, title=None, legend=None, figsize=(900, 300), filename=None, display=True, **kwargs)[source]Plot SHAP's force plot.</p> <p>Visualize the given SHAP values with an additive force layout. Note that by default this plot will render using javascript. For a regular figure use <code>matplotlib=True</code> (this option is only available when only a single sample is plotted). Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_force()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=(900, 300) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>**kwargs Additional keyword arguments for shap.plots.force. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_force/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_force(index=-2, matplotlib=True, figsize=(1800, 300))\n</code></pre>"}, {"location": "API/plots/plot_shap_heatmap/", "title": "plot_shap_heatmap", "text": "<p>method plot_shap_heatmap(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's heatmap plot.</p> <p>This plot is designed to show the population substructure of a dataset using supervised clustering and a heatmap. Supervised clustering involves clustering data points not by their original feature values but by their explanations. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_heatmap()</code>. <p>index: slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_heatmap method does not support plotting a single sample. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p>plot_shap_waterfall Plot SHAP's waterfall plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_heatmap/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_heatmap(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_scatter/", "title": "plot_shap_scatter", "text": "<p>method plot_shap_scatter(models=None, index=None, columns=0, target=1, title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot SHAP's scatter plot.</p> <p>Plots the value of the feature on the x-axis and the SHAP value of the same feature on the y-axis. This shows how the model depends on the given feature, and is like a richer extension of the classical partial dependence plots. Vertical dispersion of the data points represents interaction effects. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_scatter()</code>. <p>index: slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_scatter method does not support plotting a single sample. <p>columns: int or str, default=0 Column to plot. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_scatter/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_scatter(columns=\"symmetry error\")\n</code></pre>"}, {"location": "API/plots/plot_shap_waterfall/", "title": "plot_shap_waterfall", "text": "<p>method plot_shap_waterfall(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's waterfall plot.</p> <p>The SHAP value of a feature represents the impact of the evidence provided by that feature on the model\u2019s output. The waterfall plot is designed to visually display how the SHAP values (evidence) of each feature move the model output from our prior expectation under the background data distribution, to the final model prediction given the evidence of all the features. Features are sorted by the magnitude of their SHAP values with the smallest magnitude features grouped together at the bottom of the plot when the number of features in the models exceeds the <code>show</code> parameter. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_waterfall()</code>. <p>index: int, str or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_waterfall method does not support plotting multiple samples. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_heatmap Plot SHAP's heatmap plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_waterfall/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_waterfall(show=10)\n</code></pre>"}, {"location": "API/plots/plot_slice/", "title": "plot_slice", "text": "<p>method plot_slice(models=None, params=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot the parameter relationship in a study.</p> <p>The color of the markers indicate the trial. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_slice()</code>. <p>params: str, slice, sequence or None, default=None Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. If None, all the model's hyperparameters are selected. <p>metric: int or str, default=None Metric to plot (only for multi-metric runs). If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_parallel_coordinate Plot high-dimensional parameter relationships in a study.</p> <p></p>"}, {"location": "API/plots/plot_slice/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RF\",\n    metric=[\"f1\", \"recall\"],\n    n_trials=15,\n)\natom.plot_slice(params=(0, 1, 2))\n</code></pre>"}, {"location": "API/plots/plot_successive_halving/", "title": "plot_successive_halving", "text": "<p>method plot_successive_halving(models=None, metric=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot scores per iteration of the successive halving.</p> <p>Only use with models fitted using successive halving. Ensembles are ignored.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_learning_curve Plot the learning curve: score vs number of training samples.</p> <p>plot_results Plot the model results.</p> <p></p>"}, {"location": "API/plots/plot_successive_halving/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.successive_halving([\"Tree\", \"Bag\", \"RF\", \"LGB\"], n_bootstrap=5)\natom.plot_successive_halving()\n</code></pre>"}, {"location": "API/plots/plot_terminator_improvement/", "title": "plot_terminator_improvement", "text": "<p>method plot_terminator_improvement(models=None, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the potentials for future objective improvement.</p> <p>This function visualizes the objective improvement potentials. It helps to determine whether you should continue the optimization or not. The evaluated error is also plotted. Note that this function may take some time to compute the improvement potentials. This plot is only available for models that ran hyperparameter tuning.</p> <p>Warning</p> <ul> <li>The plot_terminator_improvement method is only available   for models that ran hyperparameter tuning using   cross-validation, e.g. using <code>ht_params={'cv': 5}</code>.</li> <li>This method can be slow. Results are cached to fasten   repeated calls.</li> </ul> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\", Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y) <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_pareto_front Plot the Pareto front of a study.</p> <p>plot_timeline Plot the timeline of a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_terminator_improvement/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"RF\", n_trials=10, ht_params={\"cv\": 5})\natom.plot_terminator_improvement()\n</code></pre>"}, {"location": "API/plots/plot_threshold/", "title": "plot_threshold", "text": "<p>method plot_threshold(models=None, metric=None, dataset=\"test\", target=0, steps=100, title=None, legend=\"lower left\", figsize=(900, 600), filename=None, display=True)[source]Plot metric performances against threshold values.</p> <p>This plot is available only for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: str, func, scorer, sequence or None, default=None Metric to plot. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred)</code>, a scorer object or a sequence of these. Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>steps: int, default=100 Number of thresholds measured. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_calibration Plot the calibration curve for a binary classifier.</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_probabilities Plot the probability distribution of the target classes.</p> <p></p>"}, {"location": "API/plots/plot_threshold/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_threshold()\n</code></pre>"}, {"location": "API/plots/plot_timeline/", "title": "plot_timeline", "text": "<p>method plot_timeline(models=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the timeline of a study.</p> <p>This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\", Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y) <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_slice Plot the parameter relationship in a study.</p> <p>plot_terminator_improvement Plot the potentials for future objective improvement.</p> <p></p>"}, {"location": "API/plots/plot_timeline/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom optuna.pruners import PatientPruner\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"LGB\",\n    n_trials=15,\n    ht_params={\"pruner\": PatientPruner(None, patience=2)},\n)\natom.plot_timeline()\n</code></pre>"}, {"location": "API/plots/plot_trials/", "title": "plot_trials", "text": "<p>method plot_trials(models=None, metric=None, title=None, legend=\"upper left\", figsize=(900, 800), filename=None, display=True)[source]Plot the hyperparameter tuning trials.</p> <p>Creates a figure with two plots: the first plot shows the score of every trial and the second shows the distance between the last consecutive steps. The best trial is indicated with a star. This is the same plot as produced by <code>ht_params={\"plot\": True}</code>. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Add <code>+</code> between options to select more than one. If None, all metrics are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 800) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_evals Plot evaluation curves.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_results Plot the model results.</p> <p></p>"}, {"location": "API/plots/plot_trials/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"ET\", \"RF\"], n_trials=15)\natom.plot_trials()\n</code></pre>"}, {"location": "API/plots/plot_wordcloud/", "title": "plot_wordcloud", "text": "<p>method plot_wordcloud(index=None, title=None, legend=None, figsize=(900, 600), filename=None, display=True, **kwargs)[source]Plot a wordcloud from the corpus.</p> <p>The text for the plot is extracted from the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>Parametersindex: int, str, slice, sequence or None, default=None Documents in the corpus to include in the wordcloud. If None, it selects all documents in the dataset. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>**kwargs Additional keyword arguments for the Wordcloud object. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_ngrams Plot n-gram frequencies.</p> <p>plot_pipeline Plot a diagram of the pipeline.</p> <p></p>"}, {"location": "API/plots/plot_wordcloud/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.textclean()\natom.textnormalize()\natom.plot_wordcloud()\n</code></pre>"}, {"location": "API/training/directclassifier/", "title": "DirectClassifier", "text": "<p>class atom.training.DirectClassifier(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>SuccessiveHalvingClassifier Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingClassifier Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = DirectClassifier(models=[\"LR\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/directforecaster/", "title": "DirectForecaster", "text": "<p>class atom.training.DirectForecaster(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>SuccessiveHalvingForecaster Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingForecaster Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = DirectForecaster(models=[\"ES\", \"ETS\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/directregressor/", "title": "DirectRegressor", "text": "<p>class atom.training.DirectRegressor(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingRegressor Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = DirectRegressor(models=[\"OLS\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/", "title": "SuccessiveHalvingClassifier", "text": "<p>class atom.training.SuccessiveHalvingClassifier(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>DirectClassifier Train and evaluate the models in a direct fashion.</p> <p>TrainSizingClassifier Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = SuccessiveHalvingClassifier([\"LR\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/", "title": "SuccessiveHalvingForecaster", "text": "<p>class atom.training.SuccessiveHalvingForecaster(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>DirectForecaster Train and evaluate the models in a direct fashion.</p> <p>TrainSizingForecaster Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = SuccessiveHalvingForecaster([\"ETS\", \"ES\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingregressor/", "title": "SuccessiveHalvingRegressor", "text": "<p>class atom.training.SuccessiveHalvingRegressor(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>TrainSizingRegressor Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = SuccessiveHalvingRegressor([\"OLS\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingclassifier/", "title": "TrainSizingClassifier", "text": "<p>class atom.training.TrainSizingClassifier(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = TrainSizingClassifier(models=\"LR\", verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingforecaster/", "title": "TrainSizingForecaster", "text": "<p>class atom.training.TrainSizingForecaster(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>DirectForecaster Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingForecaster Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = TrainSizingForecaster([\"ETS\", \"ES\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingregressor/", "title": "TrainSizingRegressor", "text": "<p>class atom.training.TrainSizingRegressor(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine=None, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous succesfull   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict or None, default=None Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. If None, the default options are selected. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\" (default)</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\" (default)</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = TrainSizingRegressor(models=\"OLS\", verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: seriesName of all the columns.n_columns: intNumber of columns.features: seriesName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | SEQUENCEColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "changelog/v4.x.x/", "title": "Release history", "text": ""}, {"location": "changelog/v4.x.x/#version-4141", "title": "Version 4.14.1", "text": "<ul> <li>Fixed an installation issue with <code>conda</code>.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4140", "title": "Version 4.14.0", "text": "<ul> <li>Refactor of the Cleaner and Vectorizer classes.</li> <li>Refactor of the cross_validate method.</li> <li>The plot_pipeline method now supports drawing multiple pipelines.</li> <li>Renamed the <code>Normalizer</code> class to <code>TextNormalizer</code>.</li> <li>Renamed the <code>Gauss</code> class to <code>Normalizer</code>.</li> <li>Added the <code>inverse_transform</code> method to the Scaler,   Normalizer and Cleaner    classes.</li> <li>Added the <code>winners</code> property to the trainers (note the extra <code>s</code>). </li> <li>Added the <code>feature_names_in_</code> and <code>n_features_in_</code> attributes to transformers.</li> <li>The default value of the <code>warnings</code> parameter is set to False.</li> <li>Improvements for multicollinearity removal in FeatureSelector.</li> <li>Renamed default feature names to <code>x0</code>, <code>x1</code>, etc... for consistency with   sklearn's API.</li> <li>Renamed component names in FeatureSelector   to <code>pca0</code>, <code>pca1</code>, etc... for consistency with sklearn's API.</li> <li>Significant speed up in pipeline transformations.</li> <li>Fixed a bug where mlflow runs could be ended unexpectedly.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4131", "title": "Version 4.13.1", "text": "<ul> <li>Fixed an installation issue.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4130", "title": "Version 4.13.0", "text": "<ul> <li>Added GPU support. Read more in the user guide.</li> <li>Added advanced feature selection strategies.</li> <li>Added the <code>return_sparse</code> parameter to the Vectorizer class.</li> <li>Added the <code>quantile</code> hyperparameter to the Dummy model.</li> <li>The data attributes now return   pandas objects where possible.</li> <li>Fixed a bug where the BO could crash after balancing   the data.</li> <li>Fixed a bug where saving the FeatureGenerator   class could fail for certain operators.</li> <li>Fixed a bug where the FeatureSelector   class displayed the wrong output.</li> <li>Fixed a bug where the <code>mapping</code> attribute was not reordered.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4120", "title": "Version 4.12.0", "text": "<ul> <li>Support for Python 3.10.</li> <li>New Discretizer class to bin numerical features.</li> <li>Refactor of the FeatureGenerator class.</li> <li>The <code>mapping</code> attribute now shows all encoded features.</li> <li>Added the <code>sample_weight</code> parameter to the evaluate method.</li> <li>ATOMClassifier has now a <code>stratify</code> parameter   to split the data sets in a stratified fashion.</li> <li>Possibility to exclude hyperparameters from the BO adding <code>!</code> before the name.</li> <li>Added memory usage to the stats method.</li> <li>Fixed a bug where decision_plot could fail when   only one row was plotted.</li> <li>Added versioning to the documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4110", "title": "Version 4.11.0", "text": "<ul> <li>Full support for sparse matrices. Read more in the user guide.</li> <li>The shrink method now also handles   sparse features.</li> <li>Refactor of the distribution method.</li> <li>Added three new linear models: Lars, Huber   and Perc.</li> <li>Dimensions can be shared across models using the key 'all' in <code>ht_params[\"dimensions\"]</code>.</li> <li>Assign hyperparameters to tune using the predefined dimensions.</li> <li>It's now possible to tune a custom number of layers for the MLP model.</li> <li>If multiple BO calls share the best score, the one with the shortest   training time is selected as winner (instead of the first).</li> <li>Fixed a bug where the BO could fail when custom dimensions where defined.</li> <li>Fixed a bug where FeatureSelector   could fail after repeated calls to fit.</li> <li>Fixed a bug where FeatureGenerator   didn't pass the correct data indices to its output.</li> <li>Performance improvements for the custom pipeline.</li> <li>Minor documentation fixes.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4100", "title": "Version 4.10.0", "text": "<ul> <li>Added the <code>holdout</code> data set to have an extra way of assessing a   model's performance on a completely independent dataset. Read more   in the user_guide.</li> <li>Complete rework of the ensemble models.</li> <li>Support for dataframe indexing. Read more in the user guide.</li> <li>New plot_parshap plot to detect overfitting   features.</li> <li>The new dashboard method makes analyzing   the models even easier using a dashboard app.</li> <li>The plot_feature_importance   plot now also accepts estimators with coefficients.</li> <li>Added the transform method for models.</li> <li>Added the <code>threshold</code> parameter to the evaluate   method.</li> <li>The <code>reset_predictions</code> method is deprecated in favour of the new   clear method.</li> <li>Refactor of the model's full_train method.</li> <li>The merge method is available   for all trainers.</li> <li>Improvements in the trainer's pipeline.</li> <li>Training scores are now also saved to the mlflow run.</li> <li>Trying to change the data in a branch after fitting a model with it now   raises an exception.</li> <li>Fixed a bug where the columns of array inputs were not ordered correctly.</li> <li>Fixed a bug where branches did not correctly act case-insensitive.</li> <li>Fixed a bug where the export_pipeline   method for models would not export the transformers in the correct branch.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-491", "title": "Version 4.9.1", "text": "<ul> <li>Changed the default cross-validation for hyperparameter tuning   from 5 to 1 to avoid errors with deep learning models.</li> <li>Added clearer exception messages when a model's run failed.</li> <li>Fixed a bug where custom dimensions didn't show during   hyperparameter tuning.</li> <li>Documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-490", "title": "Version 4.9.0", "text": "<ul> <li>Drop support of Python 3.6.</li> <li>Added the HistGBM model.</li> <li>Improved print layout for hyperparameter tuning.</li> <li>The new available_models   method returns an overview of the available predefined models.</li> <li>The calibrate and cross_validate   methods can no longer be accessed from the trainers.</li> <li>The <code>pipeline</code> parameter for the prediction methods   is deprecated.</li> <li>Improved visualization of the plot_rfecv,   plot_successive_halving and   plot_learning_curve methods.</li> <li>Sparse matrices are now accepted as input.</li> <li>Duplicate BO calls are no longer calculated.</li> <li>Improvement in performance of the RNN model.</li> <li>Refactor of the model's <code>bo</code> attribute.</li> <li>Predefined hyperparameters have been updated to be consistent with sklearn's API.</li> <li>Fixed a bug where custom scalers were ignored by the models.</li> <li>Fixed a bug where the BO of certain models would crash with custom hyperparameters.</li> <li>Fixed a bug where duplicate column names could be generated from a custom transformer.</li> <li>Documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-480", "title": "Version 4.8.0", "text": "<ul> <li>The Encoder class now directly handles   unknown categories encountered during fitting.</li> <li>The Balancer and Encoder   classes now accept custom estimators for the <code>strategy</code> parameter.</li> <li>The new merge method enables the   user to merge multiple atom instances into one.</li> <li>The dtype shrinking is moved from atom's initializers to the   shrink method.</li> <li>ATOM's custom pipeline now   handles transformers fitted on a subset of the dataset.</li> <li>The <code>column</code> parameter in the distribution   method is renamed to <code>columns</code> for continuity of the API.</li> <li>The <code>mae</code> criterion for the GBM model hyperparameter   tuning is deprecated to be consistent with sklearn's API.</li> <li>Branches are now case-insensitive.</li> <li>Renaming a branch using an existing name now raises an exception.</li> <li>Fixed a bug where columns of type <code>category</code> broke the Imputer   class.</li> <li>Fixed a bug where predictions of the Stacking   ensemble crashed for branches with multiple transformers.</li> <li>The tables in the documentation now adapt to dark mode.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-473", "title": "Version 4.7.3", "text": "<ul> <li>Fixed a bug where the conda-forge recipe couldn't install properly.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-472", "title": "Version 4.7.2", "text": "<ul> <li>Fixed a bug where the pipeline failed for custom transformers that   returned sparse matrices.</li> <li>Package requirements files are added to the installer.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-471", "title": "Version 4.7.1", "text": "<ul> <li>Fixed a bug where the pip installer failed.</li> <li>Fixed a bug where categorical columns also selected datetime columns.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-470", "title": "Version 4.7.0", "text": "<ul> <li>Launched our new slack channel!</li> <li>The new FeatureExtractor class   extracts useful features from datetime columns.</li> <li>The new plot_det method plots a binary classifier's   detection error tradeoff curve. </li> <li>The partial dependence plot is   able to draw Individual Conditional Expectation (ICE) lines.</li> <li>The full traceback of exceptions encountered during training are now   saved to the logger.</li> <li>ATOMClassifier and ATOMRegressor   now convert the dtypes of the input data to the minimal allowed type   for memory efficiency.</li> <li>The scoring method is renamed to evaluate   to clarify its purpose.</li> <li>The <code>column</code> parameter in the apply method   is renamed to <code>columns</code> for continuity of the API.</li> <li>Minor documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-460", "title": "Version 4.6.0", "text": "<ul> <li>Added the full_train method to retrieve   an estimator trained on the complete dataset.</li> <li>The score method is now also able to calculate   custom metrics on new data.</li> <li>Refactor of the Imputer class. </li> <li>Refactor of the Encoder class to avoid errors   for unknown classes and allow the input of missing values.</li> <li>The clean method no longer automatically   encodes the target column for regression tasks.</li> <li>Creating a branch using a models' acronym as name now raises an exception.</li> <li>Fixed a bug where CatBoost failed when <code>early_stopping</code> &lt; 1.</li> <li>Fixed a bug where created pipelines had duplicated names.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-450", "title": "Version 4.5.0", "text": "<ul> <li>Support of NLP pipelines. Read more in the user guide.</li> <li>Integration of mlflow to track all models in the   pipeline. Read more in the user guide.</li> <li>The new Normalizer class transforms features to   a more Gaussian-like distribution.</li> <li>New cross_validate method to   evaluate the robustness of a pipeline using cross_validation.</li> <li>New reset method to go back to atom's   initial state.</li> <li>Added the Dummy model to compare other models with a   simple baseline.</li> <li>New plot_wordcloud and plot_ngrams   methods for text visualization.</li> <li>Plots now can return the figure object when <code>display=None</code>.</li> <li>The Pruner class can now able to drop outliers   based on the selection of multiple strategies.</li> <li>The new <code>shuffle</code> parameter in atom's initializer determines whether to   shuffle the dataset.</li> <li>The trainers no longer require you to specify a model using the <code>models</code>   parameter. If left to default, all predefined models   for that task are used.</li> <li>The apply method now accepts args and   kwargs for the function.</li> <li>Refactor of the evaluate method.</li> <li>Refactor of the export_pipeline method.</li> <li>The parameters in the Cleaner class have   been refactored to better describe their function.</li> <li>The <code>train_sizes</code> parameter in train_sizing   now accepts integer values to automatically create equally distributed   splits in the training set.</li> <li>Refactor of plot_pipeline to show models in the   diagram as well.</li> <li>Refactor of the <code>bagging</code> parameter to the (more appropriate) name <code>n_bootstrap</code>.</li> <li>New option to exclude columns from a transformer adding <code>!</code> before their name.</li> <li>Fixed a bug where the Pruner class failed if   there were categorical columns in the dataset.</li> <li>Completely reworked documentation website.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-440", "title": "Version 4.4.0", "text": "<ul> <li>The drop method now allows the user   to drop columns as part of the pipeline.</li> <li>New apply method to perform data transformations   as function to the pipeline</li> <li>Added the status method to save an   overview of atom's branches and models to the logger.</li> <li>Improved the output messages for the Imputer class.</li> <li>The dataset's columns can now be called directly from atom.</li> <li>The distribution and plot_distribution   methods now ignore missing values.</li> <li>Fixed a bug where transformations could fail when columns were added to the   dataset after initializing the pipeline.</li> <li>Fixed a bug where the Cleaner class didn't drop   columns consisting entirely of missing values when <code>drop_min_cardinality=True</code>.</li> <li>Fixed a bug where the winning model wasn't displayed correctly.</li> <li>Refactored the way transformers are added or removed from predicting methods.</li> <li>Improved documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-430", "title": "Version 4.3.0", "text": "<ul> <li>Possibility to add custom transformers to the pipeline.</li> <li>The export_pipeline utility method exports atom's current pipeline to a sklearn object.</li> <li>Use AutoML to automate the search for an optimized pipeline.</li> <li>New magic methods makes atom behave similarly to sklearn's Pipeline.</li> <li>All training approaches can now be combined in the same atom instance.</li> <li>New plot_relationships, plot_distribution and plot_qq plots for data inspection.</li> <li>Complete rework of all the shap plots to be consistent with their new API.</li> <li>Improvements for the Scaler and Pruner classes.</li> <li>The acronym for custom models now defaults to the capital letters in the class' __name__.</li> <li>Possibility to apply transformations on only a subset of the columns.</li> <li>Plots and methods now accept <code>winner</code> as model name.</li> <li>Fixed a bug where custom metrics didn't show the correct name.</li> <li>Fixed a bug where timers were not displayed correctly.</li> <li>Further compatibility with deep learning datasets.</li> <li>Large refactoring for performance optimization.</li> <li>Cleaner output of messages to the logger.</li> <li>Plots no longer show a default title.</li> <li>Added the AutoML example notebook.</li> <li>Minor bug fixes.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-421", "title": "Version 4.2.1", "text": "<ul> <li>Bug fix where there was memory leakage in successive halving   and train sizing pipelines.</li> <li>The XGBoost,   LightGBM and   CatBoost packages can now be installed through the installer's   extras_require under the name <code>models</code>, e.g. <code>pip install -U atom-ml[models]</code>.</li> <li>Improved documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-420", "title": "Version 4.2.0", "text": "<ul> <li>Possibility to add custom models to the pipeline using ATOMModel.</li> <li>Compatibility with deep learning models.</li> <li>New branch system for different data pipelines. Read more in the user guide.</li> <li>Use the canvas contextmanager to draw multiple plots in one figure.</li> <li>New voting and stacking ensemble techniques.</li> <li>New get_class_weight utility method.</li> <li>New Sequential Feature Selection strategy for the FeatureSelector.</li> <li>Added the <code>sample_weight</code> parameter to the score method.</li> <li>New ways to initialize the data in the <code>training</code> instances.</li> <li>The <code>n_rows</code> parameter in ATOMLoader is deprecated in   favour of the new input formats.</li> <li>The <code>test_size</code> parameter now also allows integer values.</li> <li>Renamed categories to classes to be consistent with sklearn's API.</li> <li>The class property now returns a pd.DataFrame of the number of rows per target class   in the train, test and complete dataset.</li> <li>Possibility to add custom parameters to an estimator's fit method through <code>est_params</code>.</li> <li>The successive halving and train sizing   approaches now both allow subsequent runs from atom without losing the   information from previous runs.</li> <li>Bug fix where ATOMLoader wouldn't encode the target column during transformation.</li> <li>Added the Deep learning,    Ensembles and   Utilities example notebooks.</li> <li>Support for python 3.9.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-410", "title": "Version 4.1.0", "text": "<ul> <li>New <code>est_params</code> parameter to customize the parameters in every model's estimator.</li> <li>Following skopt's API, the <code>n_random_starts</code> parameter to specify the number   of random trials is deprecated in favour of <code>n_initial_points</code>.</li> <li>The Balancer class now allows you to use any of the   strategies from imblearn.</li> <li>New utility attributes to inspect the dataset.</li> <li>Four new models: CatNB, CNB,   ARD and RNN.</li> <li>Added the models section to the documentation.</li> <li>Small changes in log outputs.</li> <li>Bug fixes and performance improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-401", "title": "Version 4.0.1", "text": "<ul> <li>Bug fix where the FeatureGenerator   was not deterministic for a fixed random state.</li> <li>Bug fix where subsequent runs with the same metric failed.</li> <li>Added the license file to the package's installer.</li> <li>Typo fixes in documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-400", "title": "Version 4.0.0", "text": "<ul> <li>Bayesian optimization package changed from GpyOpt   to skopt.</li> <li>Complete revision of the model's hyperparameters.</li> <li>Four SHAP plots can now be called directly from an ATOM pipeline.</li> <li>Two new plots for regression tasks.</li> <li>New plot_pipeline and <code>pipeline</code> attribute to access all transformers. </li> <li>Possibility to determine transformer parameters per method.</li> <li>New calibration method and plot.</li> <li>Metrics can now be added as scorers or functions with signature metric(y, y_pred, **kwargs).</li> <li>Implementation of multi-metric runs.</li> <li>Possibility to choose which metric to plot.</li> <li>Early stopping for models that allow in-training validation.</li> <li>Added the ATOMLoader function to load any saved pickle instance.</li> <li>The \"remove\" strategy in the data cleaning parameters is deprecated in favour of \"drop\".</li> <li>Implemented the dfs strategy in FeatureGenerator.</li> <li>All training classes now inherit from BaseEstimator.</li> <li>Added multiple new example notebooks.</li> <li>Tests coverage up to 100%.</li> <li>Completely new documentation page.</li> <li>Bug fixes and performance improvements.</li> </ul>"}, {"location": "changelog/v5.x.x/", "title": "Release history", "text": ""}, {"location": "changelog/v5.x.x/#version-600", "title": "Version 6.0.0", "text": "<p> New features</p> <ul> <li>Completely new module for time series. Read more in the user guide.</li> <li>Support for Python 3.11.</li> <li>New update_traces method to further customize your plots.</li> <li>New data engines. Read more in the user guide.</li> </ul> <p> API changes</p> <ul> <li>The FeatureGrouper class no longer accepts a <code>name</code> parameter. Provide   the group names directly through the <code>group</code> parameter as dict.</li> <li>Rework of the register method.</li> <li>The <code>multioutput</code> attribute is deprecated. Multioutput meta-estimators are   now assigned automatically.</li> <li>Model tags have to be separated from the acronym by an underscore.</li> <li>The <code>engine</code> parameter is now a dict.</li> </ul> <p> Enhancements</p> <ul> <li>Transformations only on <code>y</code> are now accepted, e.g. <code>atom.scale(columns=-1)</code>.</li> <li>The dataset can now be provided as a callable.</li> <li>Cleaner representation on hover for the plot_timeline method.</li> <li>Added the <code>hdbscan</code> strategy to the Pruner class.</li> <li>The <code>cv</code> key in <code>ht_params</code> now accepts a custom cross-validation generator.</li> <li>Improved error message for incorrect stratification of multioutput datasets.</li> <li>Rework of the shrink method.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where the cross_validate method could   fail for pipelines that changed the number of rows.</li> <li>Fixed a bug where the Pruner class didn't drop all outlier clusters.</li> <li>Fixed a bug where the pipeline could fail for transformers that returned a   series.</li> <li>Fixed a bug where the pipeline could fail for transformers that reset its   internal attributes during fitting.</li> <li>Fixed a bug where the register method failed in Databricks.</li> <li>Fixed a bug where tuning hyperparameter for a <code>base_estimator</code> inside a custom   meta-estimator would fail.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-520", "title": "Version 5.2.0", "text": "<p> New features</p> <ul> <li>Two new plot methods: plot_terminator_improvement and plot_timeline.</li> </ul> <p> Enhancements</p> <ul> <li>Data splits in every trial are now properly stratified according to the   selected strategy.</li> <li>Performance optimization for multiple methods using smart caching.</li> <li>Improved visualizations for plots with logarithmic hyperparameters.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where parameters in a trial would not match with those displayed.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-512", "title": "Version 5.1.2", "text": "<p> API changes</p> <ul> <li>The default <code>strategy</code> for the <code>encode</code> method has   changed from \"LeaveOneOut\" to \"Target\"-encoding. LeaveOneOut is no longer a   supported strategy.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where stratification failed for datasets where the target column was   not placed last.</li> <li>Fixed a bug where transformers with no <code>get_feature_names_out</code> method could fail.</li> <li>Fixed a bug where the FeatureSelector class could fail when transforming a   dataset with different column order than seen at fit time.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-511", "title": "Version 5.1.1", "text": "<p> API changes</p> <ul> <li>The <code>infrequent_to_value</code> parameter in the Encoder class is replaced with   <code>infrequent_to_value</code> to be consistent with sklearn's naming convention.</li> </ul> <p> Enhancements</p> <ul> <li>Added the <code>kwargs</code> parameter to the save_data method.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed an installation issue for systems without an x86 architecture.</li> <li>Fixed a bug where Voting would fail for certain metrics.</li> <li>Fixed a bug where the automl method would fail for some transformers.</li> <li>Fixed a bug where the time metric in mlflow was always zero.</li> <li>Fixed a bug where shap plots wouldn't display the full column names.</li> <li>Fixed a bug where column names where not properly propagated during   transformation.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-510", "title": "Version 5.1.0", "text": "<p> New features</p> <ul> <li>Support for multilabel classification, multiclass-multilabel classification   and multioutput regression tasks. Read more in the user guide.</li> <li>New backend parameter to choose a parallel execution   backend.</li> <li>New <code>parallel</code> parameter to train multiple models   simultaneously.</li> <li>Integration with DAGsHub to store your mlflow experiments.   Read more in the user guide.</li> <li>New serve method to deploy models to a rest API endpoint.</li> <li>New get_best_threshold method to calculate the   optimal threshold for binary and multilabel tasks.</li> <li>New get_sample_weight method to calculate   the sample weights for a balanced data set.</li> </ul> <p> API changes</p> <ul> <li>The <code>report</code> method is renamed to eda.</li> <li>The <code>ATOMLoader</code> class is deprecated in favor of the load method.</li> <li>The <code>errors</code> attribute for runners is deprecated.</li> </ul> <p> Enhancements</p> <ul> <li>Added three new notebook examples.</li> <li>Added the <code>drop_chars</code> parameter to the Cleaner class.</li> <li>Added the <code>errors</code> parameter to the trainers.</li> <li>Rework of the dependencies, making the base package more lightweight.</li> <li>The logging entries for external libraries are redirected to atom's   file handler.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed multiple errors that appeared after sklearn's 1.2 update.</li> <li>Fixed a bug where hyperparameter tuning could fail for multi-metric runs.</li> <li>Fixed a bug where trials would try to report multiple times the same step.</li> <li>Fixed a bug where custom models could skip in-training validation.</li> <li>Fixed an issue where the bootstrapping estimators were trained using   <code>partial_fit</code>.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-501", "title": "Version 5.0.1", "text": "<p> Bug fixes</p> <ul> <li>Fixed installation issue.</li> <li>Updated package dependencies.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-500", "title": "Version 5.0.0", "text": "<p> New features</p> <ul> <li>Completely new hyperparameter tuning process.</li> <li>Completely reworked plotting interface.</li> <li>Accelerate your pipelines with sklearnex.</li> <li>New FeatureGrouper class to extract statistical features from   similar groups.</li> <li>New create_app method to create a nice front-end   for model predictions.</li> <li>New inverse_transform method for   atom and models.</li> <li>New linear model: OrthogonalMatchingPursuit.</li> <li>The plot_results method now accepts time metrics.</li> </ul> <p> API changes</p> <ul> <li>The <code>gpu</code> parameter is deprecated in favor of <code>device</code>   and <code>engine</code>.</li> <li>Refactor of the Cleaner, Discretizer, Encoder and FeatureSelector   classes.</li> <li>Refactor of all shap plots.</li> <li>Refactor of the apply method.</li> <li>The <code>plot_scatter_matrix</code> method is renamed to plot_relationships.</li> <li>The <code>kSVM</code> model is renamed to SVM.</li> <li>Multidimensional datasets are no longer supported. Check the deep learning   section of the user guide for guidance with such datasets.</li> <li>The <code>greater_is_better</code>, <code>needs_proba</code> and <code>needs_threshold</code> parameters are   deprecated. Metric functions are now created using make_scorer's   default parameters.</li> <li>The <code>drop</code> method is removed from atom. Use the reworked apply   method instead.</li> <li>The prediction methods can no longer be called from atom.</li> <li>The dashboard method for models is now called create_dashboard.</li> </ul> <p> Enhancements</p> <ul> <li>New examples for plotting, automated feature scaling,   pruning and advanced hyperparameter tuning.</li> <li>The Normalizer class can now be accelerated with GPU.</li> <li>The Scaler class now ignores binary columns (only 0s and 1s).</li> <li>The <code>models</code> parameter in plot and utility methods now accepts model indices.</li> <li>The transform method now also transforms   only <code>y</code> when <code>X</code> has a default value.</li> <li>The prediction methods now return pandas objects.</li> <li>Dependency versions are checked with originals after unpickling.</li> <li>Automatic generation of documentation from docstrings.</li> <li>Improvements in documentation display for mobile phones.</li> <li>New <code>feature_importance</code> attribute for models.</li> <li>Added a visualization for automated feature scaling to plot_pipeline.</li> </ul> <p> Bug fixes</p> <ul> <li>The FeatureExtractor class no longer raises a warning for highly   fragmented dataframes.</li> <li>Fixed a bug where models could not call the score function.</li> <li>The Encoder class no longer fails when the user provides ordinal   values that are not present during fitting.</li> <li>Fixed a bug with the <code>max_nan_rows</code> parameter in the Imputer class.</li> <li>Fixed a bug where Tokenizer could fail when no ngrams were found.</li> </ul>"}, {"location": "user_guide/accelerating/", "title": "Accelerating pipelines", "text": "<p>For very large datasets, ATOM offers various ways to accelerate its pipeline:</p> <ul> <li>Run estimators on GPU</li> <li>Use a faster data engine</li> <li>Use a faster estimator engine</li> <li>Run processes in parallel</li> </ul> <p>Warning</p> <p>Performance improvements are usually noticeable for datasets larger  than ~5M rows. For smaller datasets, using other values than the default can even harm performance!</p>"}, {"location": "user_guide/accelerating/#gpu-acceleration", "title": "GPU acceleration", "text": "<p>Graphics Processing Units (GPUs) can significantly accelerate calculations for preprocessing steps or training machine learning models. Training models involves compute-intensive matrix multiplications and other operations that can take advantage of a GPU's massively parallel architecture. Training on large datasets can take hours to run on a single processor. However, if you offload those tasks to a GPU, you can reduce training time to minutes instead.</p> <p>Running transformers and models in atom using a GPU is as easy as initializing the instance with parameter <code>device=\"gpu\"</code>. The <code>device</code> parameter accepts any string that follows the SYCL_DEVICE_FILTER filter selector. Examples are:</p> <ul> <li>device=\"cpu\" (use CPU)</li> <li>device=\"gpu\" (use default GPU)</li> <li>device=\"gpu:0\" (use first GPU)</li> <li>device=\"gpu:1\" (use second GPU)</li> </ul> <p>Combine GPU acceleration with the cuml and sklearnex estimator engines. The XGBoost, LightGBM and CatBoost models come with their own GPU engine. Setting <code>device=\"gpu\"</code> is sufficient to accelerate them with GPU, regardless of the engine parameter.</p> <p>Warning</p> <p>ATOM does not support multi-GPU training. If there is more than one GPU on the machine and the <code>device</code> parameter does not specify which one to use, the first one is used by default.</p> <p>Example</p> <p> Train a model on a GPU yourself using SageMaker Studio Lab. Just click on the badge above and run the notebook! Make sure to choose the GPU compute type.</p>"}, {"location": "user_guide/accelerating/#data-acceleration", "title": "Data acceleration", "text": "<p>The data engine can be specified through the <code>engine</code> parameter, which takes a dict with a key <code>data</code> that accepts three values: numpy, pyarrow and modin.</p>"}, {"location": "user_guide/accelerating/#numpy", "title": "numpy", "text": "<p>ATOM uses <code>pandas</code> as the default library for data handling, which in turn, uses <code>numpy</code> for all data processing.</p>"}, {"location": "user_guide/accelerating/#pyarrow", "title": "pyarrow", "text": "<p>PyArrow is a library that provides a way to work with Apache Arrow memory structures. Apache Arrow is a cross-language, platform-independent, in-memory data format that provides an efficient and fast way to serialize and deserialize data. Pandas offers native integration with pyarrow, which atom uses when specifying the pyarrow data engine.</p> <p>Warning</p> <p>The pyarrow backend doesn't work for sparse datasets. If the dataset has any sparse columns, an exception is raised.</p>"}, {"location": "user_guide/accelerating/#modin", "title": "modin", "text": "<p>The modin library is a multi-threading, drop-in replacement for pandas, that uses Ray as backend.</p>"}, {"location": "user_guide/accelerating/#estimator-acceleration", "title": "Estimator acceleration", "text": "<p>The estimator engine can be specified through the <code>engine</code> parameter, which takes a dict with a key <code>estimator</code> that accepts three values: sklearn, sklearnex and cuml. Read here how to run the estimators on GPU instead of CPU.</p> <p>Warning</p> <p>Estimators accelerated with sklearnex or cuML sometimes use slightly different hyperparameters than their sklearn counterparts.</p>"}, {"location": "user_guide/accelerating/#sklearn", "title": "sklearn", "text": "<p>This is the default option, which uses the standard estimators from sklearn. Sklearn does not support training on GPU.</p>"}, {"location": "user_guide/accelerating/#sklearnex", "title": "sklearnex", "text": "<p>The Intel\u00ae Extension for Scikit-learn package (or sklearnex, for brevity) accelerates sklearn models and transformers, keeping full conformance with sklearn's API. Sklearnex is a free software AI accelerator that offers a way to make sklearn code 10\u2013100 times faster. The software acceleration is achieved through the use of vector instructions, IA hardware-specific memory optimizations, threading, and optimizations for all upcoming Intel platforms at launch time. See here an example using the sklearnex engine.</p> <p>Warning</p> <p>sklearnex estimators don't support sparse datasets nor multioutput tasks.</p> <p>Tip</p> <p>Intel\u00ae processors provide better performance than other CPUs.</p>"}, {"location": "user_guide/accelerating/#prerequisites", "title": "Prerequisites", "text": "<ul> <li>Operating System:<ul> <li>Linux (Ubuntu, Fedora, etc...)</li> <li>Windows 8.1+</li> <li>macOS (no GPU support)</li> </ul> </li> <li>CPU:<ul> <li>Processor must have x86 architecture.</li> <li>Processor must support at least one of SSE2, AVX, AVX2, AVX512 instruction sets.</li> <li>ARM* architecture is not supported.</li> </ul> </li> <li>GPU:<ul> <li>All Intel\u00ae integrated and discrete GPUs.</li> <li>Intel\u00ae GPU drivers.</li> </ul> </li> <li>Libraries:<ul> <li>sklearnex&gt;=2023.2.1 (automatically installed with atom when the processor has x86 architecture)</li> <li>dpcpp_cpp_rt&gt;=2023.2  (only for GPU acceleration)</li> </ul> </li> </ul>"}, {"location": "user_guide/accelerating/#supported-estimators", "title": "Supported estimators", "text": "<ul> <li>Pruner (only for strategy=\"dbscan\")</li> <li> <p>FeatureSelector (only for strategy=\"pca\" and dense datasets)</p> </li> <li> <p>ElasticNet (only for CPU acceleration)</p> </li> <li>KNearestNeighbors</li> <li>Lasso (only for CPU acceleration)</li> <li>LogisticRegression</li> <li>OrdinaryLeastSquares</li> <li>RandomForest</li> <li>Ridge (only for regression tasks and CPU acceleration)</li> <li>SupportVectorMachine (GPU acceleration only supports classification tasks)</li> </ul>"}, {"location": "user_guide/accelerating/#cuml", "title": "cuML", "text": "<p>cuML is the machine learning library of the RAPIDS project. cuML enables you to run traditional tabular ML tasks on GPUs without going into the details of CUDA programming. For large datasets, these GPU-based implementations can complete 10-50x faster than their CPU equivalents.</p> <p>Warning</p> <ul> <li>cuML estimators don't support multioutput tasks.</li> <li>Install cuML using <code>pip install --extra-index-url=https://pypi.nvidia.com   cuml-cu11</code> or <code>pip install --extra-index-url=https://pypi.nvidia.com   cuml-cu12</code> depending on your CUDA version. Read more about RAPIDS'   installation here.</li> </ul> <p>Tip</p> <p>Only transformers and predictors are converted to the requested engine. To use a metric from cuML, insert it directly in the <code>run</code> method:</p> <pre><code>from atom import ATOMClassifier\nfrom cuml.metrics import accuracy_score\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, random_state=1)\n\natom = ATOMClassifier(X, y, engine={\"estimator\": \"cuml\"}, verbose=2)\natom.run(\"LR\", metric=accuracy_score)\n</code></pre>"}, {"location": "user_guide/accelerating/#prerequisites_1", "title": "Prerequisites", "text": "<ul> <li>Operating System:<ul> <li>Ubuntu 18.04/20.04 or CentOS 7/8 with gcc/++ 9.0+</li> <li>Windows 10+ with WSL2 (see here a tutorial)</li> </ul> </li> <li>GPU:<ul> <li>NVIDIA Pascal\u2122 or better with compute capability 6.0+</li> </ul> </li> <li>Drivers:<ul> <li>CUDA &amp; NVIDIA Drivers of versions 11.0, 11.2, 11.4 or 11.5</li> </ul> </li> <li>Libraries:<ul> <li>cuML&gt;=23.08</li> </ul> </li> </ul>"}, {"location": "user_guide/accelerating/#supported-estimators_1", "title": "Supported estimators", "text": "<ul> <li>Cleaner</li> <li>Discretizer</li> <li>Imputer (only for strat_num!=\"knn\")</li> <li>Normalizer</li> <li>Pruner (only for strategy=\"dbscan\" and \"hdbscan\")</li> <li>Scaler</li> <li>Vectorizer</li> <li> <p>FeatureSelector (only for strategy=\"pca\")</p> </li> <li> <p>BernoulliNB</p> </li> <li>CategoricalNB</li> <li>ElasticNet</li> <li>GaussianNB</li> <li>KNearestNeighbors</li> <li>Lasso</li> <li>LinearSVM</li> <li>LogisticRegression</li> <li>MultinomialNB</li> <li>OrdinaryLeastSquares</li> <li>RandomForest</li> <li>Ridge (only for regression tasks)</li> <li>SupportVectorMachine</li> </ul>"}, {"location": "user_guide/accelerating/#parallel-execution", "title": "Parallel execution", "text": "<p>Another way to accelerate your pipelines is executing processes in parallel. Use the <code>backend</code> parameter to select one of several parallelization backends.</p> <ul> <li>loky: Used by default, can induce some communication and memory overhead   when exchanging input and output data with the worker Python processes. On   some rare systems (such as Pyiodide), the loky backend may not be available.</li> <li>multiprocessing: Previous process-based backend based on <code>multiprocessing.Pool</code>.   Less robust than loky.</li> <li>threading: Very low-overhead backend but it suffers from the Python Global   Interpreter Lock if the called function relies a lot on Python objects. It's    mostly useful when the execution bottleneck is a compiled extension that   explicitly releases the GIL (for instance a Cython loop wrapped in a \"with nogil\"   block or an expensive call to a library such as numpy).</li> <li>ray: Ray is an open-source unified compute framework   that makes it easy to scale AI and Python workloads. Read more about Ray here.   See here an example use case.</li> </ul> <p>The parallelization backend is applied in the following cases:</p> <ul> <li>In every individual estimator that uses parallelization internally.</li> <li>To calculate cross-validated results during hyperparameter tuning.</li> <li>To train multiple models in parallel (when the trainer's <code>parallel</code> parameter is True).</li> <li>To calculate partial dependencies in plot_partial_dependence.</li> </ul> <p>Note</p> <p>The <code>njobs</code> parameter sets the number of cores for the individual models as well as for parallel training. You won't gain much training two models in parallel with 2 cores, when the models also parallelize computations internally. Instead, use parallel training for models that can't parallelize their training (their constructor doesn't have the <code>n_jobs</code> parameter).</p>"}, {"location": "user_guide/data_cleaning/", "title": "Data cleaning", "text": "<p>More often than not, you'll need to do some data cleaning before fitting your dataset to a model.  Usually, this involves importing different libraries and writing many lines of code. Since ATOM is all about fast exploration  and experimentation, it provides various data cleaning classes to apply the most common transformations fast and easy.</p> <p>Note</p> <p>All of atom's data cleaning methods automatically adopt the relevant transformer attributes (<code>n_jobs</code>, <code>verbose</code>, <code>logger</code>, <code>random_state</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.scale(verbose=2)</code>.</p> <p>Note</p> <p>Like the add method, the data cleaning methods accept the <code>columns</code> parameter to only transform a subset of the dataset's features, e.g. <code>atom.scale(columns=[0, 1])</code>.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#balancing-the-data", "title": "Balancing the data", "text": "<p>One of the common issues found in datasets that are used for classification is imbalanced classes. Data imbalance usually reflects an unequal distribution of classes within a dataset. For example, in a credit card fraud detection dataset, most of the transactions are non-fraud, and a very few cases are fraud. This leaves us with a very unbalanced ratio of fraud vs non-fraud cases. The Balancer class can oversample the minority class or undersample the majority class using any of the transformers implemented in the imblearn package. It can be  accessed from atom through the balance method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#standard-data-cleaning", "title": "Standard data cleaning", "text": "<p>There are many data cleaning steps that are useful to perform on any dataset before modelling. These are general rules that apply almost on every use-case and every task. The Cleaner class is a convenient tool to apply such steps. It can be accessed from atom through the clean method. Use the class' parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Drop columns with specific data types.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column.</li> </ul> <p></p>"}, {"location": "user_guide/data_cleaning/#binning-numerical-features", "title": "Binning numerical features", "text": "<p>Discretization (otherwise known as quantization or binning) provides a way to partition continuous features into discrete values. Certain datasets with continuous features may benefit from discretization, because discretization can transform the dataset of continuous attributes to one with only nominal attributes. Discretization is similar to constructing histograms for continuous data. However, histograms focus on counting features which fall into particular bins, whereas discretization focuses on assigning feature values to these bins. The Discretizer class can be used to bin continuous data into intervals. It can be accessed from atom through the discretize method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#encoding-categorical-features", "title": "Encoding categorical features", "text": "<p>Many datasets contain categorical features. Their variables are typically stored as text values which represent various classes. Some examples include color (\u201cRed\u201d, \u201cYellow\u201d, \u201cBlue\u201d), size (\u201cSmall\u201d, \u201cMedium\u201d, \u201cLarge\u201d) or geographic designations (city or country). Regardless of what the value is used for, the challenge is determining how to use this data in the analysis. The majority of sklearn's models don't support direct manipulation of this kind of data. Use the Encoder class to encode categorical features to numerical values. It can be  accessed from atom through the encode method.</p> <p>There are many strategies to encode categorical columns. The Encoder class applies one strategy or another depending on the number of classes in the column to be encoded. When there are only two, the values are encoded with 0 or 1. When there are more than two, the columns can be encoded using one-hot encoding or any other strategy of the category-encoders package, depending on the value of the <code>max_onehot</code> parameter. One-hot encodes the column making a dummy feature for every class. This approach preserves all the information but increases the size of the dataset considerably, making it often an undesirable strategy for high cardinality features. Other strategies like Target transform the column in place.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#imputing-missing-values", "title": "Imputing missing values", "text": "<p>For various reasons, many real world datasets contain missing values, often encoded as blanks, NaNs or other placeholders. Such datasets however are incompatible with ATOM's models which assume that all values in an array are numerical, and that all have and hold meaning. The Imputer class handles missing values in the dataset by either dropping or imputing the value. It can be accessed from atom through the impute method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#normalizing-the-feature-set", "title": "Normalizing the feature set", "text": "<p>Use the Normalizer class to transform the feature set to follow a Normal (Gaussian)-like distribution. In general, data must be transformed when using models that assume normality in the residuals. Examples of such models are LogisticRegression, LinearDiscriminantAnalysis and GaussianNB. The class can be accessed from atom through the normalize method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#handling-outliers", "title": "Handling outliers", "text": "<p>When modelling, it is important to clean the data sample to ensure that the observations best represent the problem. Sometimes a dataset can contain extreme values that are outside the range of what is expected and unlike the other data. These are called outliers. Often, machine learning modelling and model skill in general can be improved by  understanding and even removing these outlier samples. The Pruner class offers 7 different strategies to detect outliers (described hereunder). It can be accessed from atom through the prune method.</p> <p>z-score The z-score of a value in the dataset is defined as the number of standard deviations by which the value is above or below the mean of the column. Values above or below a certain threshold (specified with the parameter <code>max_sigma</code>) are considered outliers. Note that, contrary to the rest of the strategies, this approach selects outlier values, not outlier samples! Because of this, it is possible to replace the outlier value instead of dropping the entire sample.</p> <p>Isolation Forest Uses a tree-based anomaly detection algorithm. It is based on modeling the normal data in such a way as to isolate anomalies that are both few and different in the feature space. Read more in sklearn's documentation.</p> <p>Elliptic Envelope If the input variables have a Gaussian distribution, then simple statistical methods can be used to detect outliers. For example, if the dataset has two input variables and both are Gaussian, the feature space forms a multidimensional Gaussian, and knowledge of this distribution can be used to identify values far from the distribution. This approach can be generalized by defining a hypersphere (ellipsoid) that covers the normal data, and data that falls outside this shape is considered an outlier. Read more in sklearn's documentation.</p> <p>Local Outlier Factor A simple approach to identifying outliers is to locate those examples that are far from the other examples in the feature space. This can work well for feature spaces with low dimensionality (few features) but becomes less reliable as the number of features is increased. The local outlier factor is a technique that attempts to harness the idea of nearest neighbors for outlier detection. Each example is assigned a score of how isolated or how likely it is to be outliers based on the size of its local neighborhood. Those examples with the largest score are more likely to be outliers. Read more in sklearn's documentation.</p> <p>One-class SVM The support vector machine algorithm, initially developed for binary classification tasks, can also be used for one-class classification. When modeling one class, the algorithm captures the density of the majority class and classifies examples on the extremes of the density function as outliers. This modification of SVM is referred to as One-Class SVM. Read more in sklearn's documentation.</p> <p>DBSCAN The DBSCAN algorithm views clusters as areas of high density separated by areas of low density. Due to this rather generic view, clusters found by DBSCAN can be any shape, as opposed to k-means which assumes that clusters are convex shaped. Samples that lie outside any cluster are considered outliers. Read more in sklearn's documentation.</p> <p>OPTICS The OPTICS algorithm shares many similarities with the DBSCAN algorithm, and can be considered a generalization of DBSCAN that relaxes the <code>eps</code> requirement from a single value to a value range. The key difference between DBSCAN and OPTICS is that the OPTICS algorithm builds a reachability graph, and a spot within the cluster ordering. These two attributes are assigned when the model is fitted, and are used to determine cluster membership. Read more in sklearn's documentation.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#scaling-the-feature-set", "title": "Scaling the feature set", "text": "<p>Standardization of a dataset is a common requirement for many machine learning estimators; they might behave badly if the individual features do not more or less look like standard normally distributed data (e.g. Gaussian with zero mean and unit variance). The Scaler class let you quickly scale atom's dataset using one of sklearn's scalers. It can be accessed from atom through the scale method. </p> <p>Info</p> <p>All strategies can utilize GPU speed-up. Click here for further information about GPU acceleration.</p>"}, {"location": "user_guide/data_management/", "title": "Data management", "text": ""}, {"location": "user_guide/data_management/#data-sets", "title": "Data sets", "text": "<p>ATOM is designed to work around one single dataset: the one with which atom is initialized. This is the dataset you want to explore, transform, and use for model training and validation. ATOM differentiates three different data sets:</p> <ul> <li>The training set is usually the largest of the data sets. As the   name suggests, this set is used to train the pipeline. During   hyperparameter tuning, only the training set is used to fit and   evaluate the estimator in every call. The training set in the current   branch can be accessed through the <code>train</code> attribute. It's   features and target can be accessed through <code>X_train</code> and <code>y_train</code>   respectively.</li> <li>The test set is used to evaluate the models. The model scores on   this set give an indication on how the model performs on new data. The   test set can be accessed through the <code>test</code> attribute. It's features   and target can be accessed through <code>X_test</code> and <code>y_test</code> respectively.</li> <li>The holdout set is an optional, separate set that should only be   used to evaluate the final model's performance. Create this set when   you are going to use the test set for an intermediate validation step.   The holdout set is immediately set apart during initialization and is   not considered part of atom's dataset (the <code>dataset</code> attribute only   returns the train and test sets). The holdout set is left untouched   until predictions are made on it, i.e. it does not undergo any pipeline   transformations. The holdout set is stored in atom's <code>holdout</code> attribute.   It's features and target can not be accessed separately. See here   an example that shows how to use the holdout data set.</li> </ul> <p>The data can be provided in different formats. If the data sets are not specified beforehand, you can input the features and target separately or together:</p> <ul> <li>X</li> <li>X, y</li> </ul> <p>Remember to use the <code>y</code> parameter to indicate the target column in X when using the first option. If not specified, the last column in X is used as target. In both these cases, the size of the sets are defined using the <code>test_size</code> and <code>holdout_size</code> parameters. Note that the splits are made after the subsample of the dataset with the <code>n_rows</code> parameter (when not left to its default value).</p> <p>If you already have the separate data sets, provide them using one of the following formats:</p> <ul> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>The input data is always converted internally to a pandas dataframe, if it isn't one already. The column names should always be strings. If they are not, atom changes their type at initialization. If no column names are provided, default names are given of the form <code>X[N-1]</code>, where N stands for the n-th feature in the dataset.</p> <p></p>"}, {"location": "user_guide/data_management/#indexing", "title": "Indexing", "text": "<p>By default, atom resets the dataframe's index after initialization and after every transformation in the pipeline. To avoid this, specify the <code>index</code> parameter. If the dataset has an 'identifier' column, it is useful to use it as index for two reasons:</p> <ul> <li>An identifier doesn't usually contain any useful information   on the target column, and should therefore be removed before training.</li> <li>Predictions of specific rows can be accessed through   their index.</li> </ul> <p>Warning</p> <p>Avoid duplicate indices in the dataframe. Having them may potentially lead to unexpected behavior.</p> <p></p>"}, {"location": "user_guide/data_management/#sparse-datasets", "title": "Sparse datasets", "text": "<p>If atom is initialized using a scipy sparse matrix, it is converted internally to a dataframe of sparse columns. Read more about pandas' sparse data structures here. The same conversion takes place when a transformer returns a sparse matrix, like for example, the Vectorizer.</p> <p>Note that ATOM considers a dataset to be sparse if any of the columns is sparse. A dataset can only benefit from sparsity when all its columns are sparse, hence mixing sparse and non-sparse columns is not recommended and can cause estimators to decrease their training speed or even crash. Use the shrink method to convert dense features to sparse and the available_models method to check which models have native support for sparse matrices.</p> <p>Click here to see an example that uses sparse data.</p> <p></p>"}, {"location": "user_guide/data_management/#multioutput-tasks", "title": "Multioutput tasks", "text": "<p>Multioutput is a task where there are more than one target column, i.e. the goal is to predict multiple targets at the same time. When providing a dataframe as target, use the y parameter. Providing <code>y</code> without keyword makes ATOM think you are providing <code>train, test</code> (see the data sets section).</p>"}, {"location": "user_guide/data_management/#task-types", "title": "Task types", "text": "<p>ATOM recognizes four multioutput tasks.</p> <p>Note</p> <p>Combinations of binary and multiclass target columns are treated as multiclass-multioutput tasks.</p>"}, {"location": "user_guide/data_management/#multilabel", "title": "Multilabel", "text": "<p>Multilabel is a classification task, labeling each sample with <code>m</code> labels from <code>n_classes</code> possible classes, where <code>m</code> can be 0 to <code>n_classes</code> inclusive. This can be thought of as predicting properties of a sample that are not mutually exclusive.</p> <p>For example, prediction of the topics relevant to a text document. The document may be about one of religion, politics, finance or education, several of the topic classes or all of the topic classes. The target column (<code>atom.y</code>) could look like this:</p> <pre><code>0                        [politics]\n1               [religion, finance]\n2    [politics, finance, education]\n3                                []\n4                         [finance]\n5               [finance, religion]\n6                         [finance]\n7               [religion, finance]\n8                       [education]\n9     [finance, religion, politics]\n\nName: target, dtype: object\n</code></pre> <p>A model can not directly ingest a variable amount of target classes. Use the clean method to assign a binary output to each class, for every sample. Positive classes are indicated with 1 and negative classes with 0. It is thus comparable to running n_classes binary classification tasks. In our example, the target (<code>atom.y</code>) is converted to:</p> <pre><code>   education  finance  politics  religion\n0          0        0         1         0\n1          0        1         0         1\n2          1        1         1         0\n3          0        0         0         0\n4          0        1         0         0\n5          0        1         0         1\n6          0        1         0         0\n7          0        1         0         1\n8          1        0         0         0\n9          0        1         1         1\n</code></pre>"}, {"location": "user_guide/data_management/#multiclass-multioutput", "title": "Multiclass-multioutput", "text": "<p>Multiclass-multioutput (also known as multitask classification) is a classification task which labels each sample with a set of non-binary properties. Both the number of properties and the number of classes per property is greater than 2. A single estimator thus handles several joint classification tasks. This is both a generalization of the multilabel classification task, which only considers binary attributes, as well as a generalization of the multiclass classification task, where only one property is considered.</p> <p>For example, classification of the properties \"type of fruit\" and \"colour\" for a set of images of fruit. The property \"type of fruit\" has the possible classes: \"apple\", \"pear\" and \"orange\". The property \"colour\" has the possible classes: \"green\", \"red\", \"yellow\" and \"orange\". Each sample is an image of a fruit, a label is output for both properties and each label is one of the possible classes of the corresponding property.</p>"}, {"location": "user_guide/data_management/#multioutput-regression", "title": "Multioutput regression", "text": "<p>Multioutput regression predicts multiple numerical properties for each sample. Each property is a numerical variable and the number of properties to be predicted for each sample is &gt;= 2. Some estimators that support multioutput regression are faster than just running n_output estimators.</p> <p>For example, prediction of both wind speed and wind direction, in degrees, using data obtained at a certain location. Each sample would be data obtained at one location and both wind speed and direction would be output for each sample.</p>"}, {"location": "user_guide/data_management/#multivariate", "title": "Multivariate", "text": "<p>Multivariate is the multioutput task for forecasting. In this case, we try to forecast more than one time series at the same time.</p> <p>Although all forecasting models in ATOM support multivariate tasks, we differentiate two types of models:</p> <ul> <li>The \"native multivariate\" models apply forecasts where every prediction   of endogeneous (<code>y</code>) variables will depend on values of the other target   columns.</li> <li>The rest of the models apply an estimator per column, meaning that forecasts   will be made per endogeneous variable, and not be affected by other variables.   To access the column-wise estimators, use the estimator's <code>forecasters_</code>   parameter, which stores the fitted forecasters in a dataframe.</li> </ul> <p>Read more about time series tasks here.</p>"}, {"location": "user_guide/data_management/#native-multioutput-models", "title": "Native multioutput models", "text": "<p>Some models have native support for multioutput tasks. This means that the original estimator is used to make predictions directly on all the target columns. Examples of such models are KNearestNeighbors, RandomForest and ExtraTrees.</p>"}, {"location": "user_guide/data_management/#non-native-multioutput-models", "title": "Non-native multioutput models", "text": "<p>The majority of the models don't have integrated support for multioutput tasks. However, it's possible to still use them for such tasks, wrapping them in a meta-estimator capable of handling multiple target columns. For non-native multioutput models, ATOM does so automatically. For multilabel tasks, the meta-estimator is:</p> <ul> <li>ClassifierChain</li> </ul> <p>And for multiclass-multioutput and multioutput regression, the meta-estimators are respectively:</p> <ul> <li>MultioutputClassifier</li> <li>MultioutputRegressor</li> </ul> <p>Warning</p> <p>Currently, scikit-learn metrics do not support multiclass-multioutput classification tasks. In this case, ATOM calculates the mean of the selected metric over every individual target.</p> <p>Tip</p> <ul> <li>Set the <code>native_multilabel</code> or <code>native_multioutput</code> parameter in ATOMModel equal to <code>True</code> to ignore the meta-estimator for custom models.</li> <li>Check out the multilabel classification and multioutput regression examples.</li> </ul> <p></p>"}, {"location": "user_guide/data_management/#branches", "title": "Branches", "text": "<p>You might want to compare how a model performs on a dataset transformed through multiple pipelines, each using different transformers. For example, on one pipeline with an undersampling strategy and the other with an oversampling strategy. To be able to do this, ATOM has the branching system.</p> <p>The branching system helps the user to manage multiple data pipelines within the same atom instance. Branches are created and accessed through atom's <code>branch</code> property. A branch contains a specific pipeline, the dataset transformed through that pipeline, and all data and utility attributes that refer to that dataset. Transformers and models called from atom use the dataset in the current branch, as well as data attributes such as <code>atom.dataset</code>. It's not allowed to change the data in a branch after fitting a model with it. Instead, create a new branch for every unique pipeline.</p> <p>By default, atom starts with one branch called \"master\". To start a new branch, set a new name to the property, e.g. <code>atom.branch = \"undersample\"</code>. This creates a new branch from the current one. To create a branch from any other branch type \"_from_\" between the new name and the branch from which to split, e.g. <code>atom.branch = \"oversample_from_master\"</code> creates branch \"oversample\" from branch \"master\", even if the current branch is \"undersample\". To switch between existing branches, just type the name of the desired branch, e.g. <code>atom.branch = \"master\"</code> brings you back to the master branch. Note that every branch contains a unique copy of the whole dataset! Creating many branches can cause memory issues for large datasets.</p> <p>See the Imbalanced datasets or Feature engineering examples for branching use cases.</p> <p>Warning</p> <p>Always create a new branch if you want to change the dataset after fitting a model! Forcing a data change through the data property's <code>@setter</code> can cause unexpected model behaviour and break down the plotting methods.</p> <p></p> <p> </p> Figure 1. Diagram of a possible branch system to compare an oversampling with an undersampling pipeline. <p></p>"}, {"location": "user_guide/data_management/#memory-considerations", "title": "Memory considerations", "text": "<p>An atom instance stores one copy of the dataset for each branch (this doesn't include the holdout set, which is only stored once), and one copy of the initial dataset with which the instance is initialized. This copy of the original dataset is necessary to avoid data leakage during hyperparameter tuning and for some specific methods like cross_validate and reset). It's created as soon as there are no branches in the initial state (usually after calling the first data transformation). If the dataset is occupying too much memory, consider using the shrink method to convert the dtypes to their smallest possible matching dtype.</p> <p>Apart from the dataset itself, a model's prediction attributes (e.g. <code>atom.lr.predict_train</code>), metric scores and shap values are also stored as attributes of the model to avoid having to recalculate them every time they are needed. This data can occupy a considerable amount of memory for large datasets. You can delete all these attributes using the clear method in order to free some memory before saving the class.</p> <p></p>"}, {"location": "user_guide/data_management/#data-transformations", "title": "Data transformations", "text": "<p>Performing data transformations is a common requirement of many datasets before they are ready to be ingested by a model. ATOM provides various classes to apply data cleaning and feature engineering transformations to the data. This tooling should be able to help you apply most of the typically needed transformations to get the data ready for modelling. For further fine-tuning, it's also possible to transform the data using custom transformers (see the add method) or through a function (see the apply method). Remember that all transformations are only applied to the dataset in the current branch.</p>"}, {"location": "user_guide/feature_engineering/", "title": "Feature engineering", "text": "<p>Feature engineering is the process of creating new features from the existing ones, in order to capture relationships with the target column that the first set of features didn't have on their own. This process is very important to improve the performance of machine learning algorithms. Although feature engineering works best when the data  scientist applies use-case specific transformations, there are ways to do this in an automated manner, without prior domain knowledge. One of the problems of creating new features without human expert intervention, is that many of the newly created features can be useless, i.e. they do not help the algorithm to make better predictions. Even worse, having useless features can drop your performance. To avoid this, we perform feature selection, a process in which we select the relevant features  in the dataset. See the Feature engineering example.</p> <p>Note</p> <p>All of atom's feature engineering methods automatically adopt the relevant transformer attributes (<code>n_jobs</code>, <code>verbose</code>, <code>logger</code>, <code>random_state</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.feature_selection(\"pca\", n_features=10, random_state=2)</code>.</p> <p>Note</p> <p>Like the add method, the feature engineering methods accept the <code>columns</code> parameter to only transform a subset of the dataset's features, e.g. <code>atom.feature_selection(\"pca\",n_features=10, columns=slice(5, 15))</code>.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#extracting-datetime-features", "title": "Extracting datetime features", "text": "<p>Features that contain dates or timestamps can not be directly ingested by models since they are not strictly numerical. Encoding them as categorical features is not an option since the encoding does not capture the relationship between the different moments in time. The FeatureExtractor class creates new features extracting datetime elements (e.g. day, month, year, hour...) from the columns. It can be accessed from atom through the feature_extraction method. The new features are named equally to the column from which they are extracted, followed by an underscore and the datetime element they create, e.g. <code>x0_day</code> for the day element of <code>x0</code>.</p> <p>Note that many time features have a cyclic pattern, e.g. after Sunday comes Monday. This means that if we would encode the days of the week from 0 to 6, we would lose that relation. A common method used to encode cyclical features is to transform the data into two dimensions using a sine and cosine transformation:</p> \\[ x_{sin} = sin\\left(\\frac{2\\pi * x}{max(x)}\\right) \\] \\[ x_{cos} = cos\\left(\\frac{2\\pi * x}{max(x)}\\right) \\] <p>The resulting features have their names followed by sin or cos, e.g. <code>x0_day_sin</code> and <code>x0_day_cos</code>. The datetime elements that can be encoded in a cyclic fashion are: microsecond, second, minute, hour, weekday, day, day_of_year, month and quarter. Note that decision trees based algorithms build their split rules according to one feature at a time. This means that they will fail to correctly process cyclic features since the sin/cos values are expected to be considered as one single coordinate system.</p> <p>Use the <code>fmt</code> parameter to specify your feature's format in case the column is categorical. The FeatureExtractor class will convert the column to the datetime dtype before extracting the specified features. Click here for an overview of the available formats.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#generating-new-features", "title": "Generating new features", "text": "<p>The FeatureGenerator class creates new non-linear features based on the original feature set. It can be accessed from atom through the feature_generation method. You can choose between two strategies: Deep Feature Synthesis and Genetic Feature Generation.</p> <p>Deep Feature Synthesis Deep feature synthesis (DFS) applies the selected operators on the features in the dataset. For example, if the operator is \"log\", it will create the new feature <code>LOG(old_feature)</code> and if the operator is \"mul\", it will create the new feature <code>old_feature_1 x old_feature_2</code>. The operators can be chosen through the <code>operators</code> parameter. Choose from:</p> <ul> <li>add: Take the sum of two features.</li> <li>sub: Subtract two features from each other.</li> <li>mul: Multiply two features with each other.</li> <li>div: Divide two features with each other.</li> <li>abs: Calculate the absolute value of a feature.</li> <li>srqt: Calculate the square root of a feature.</li> <li>log: Calculate the natural logarithm of a feature.</li> <li>sin: Calculate the sine of a feature.</li> <li>cos: Calculate the cosine of a feature.</li> <li>tan: Calculate the tangent of a feature.</li> </ul> <p>ATOM's implementation of DFS uses the featuretools package.</p> <p></p> <p>Genetic Feature Generation Genetic feature generation (GFG) uses genetic programming, a branch of evolutionary programming, to determine which features are successful and create new ones based on those. Where dfs can be seen as some kind of \"brute force\" for feature engineering, gfg tries to improve its features with every generation of the algorithm. gfg uses the same operators as dfs, but instead of only applying the transformations once, it evolves them further, creating nested structures of combinations of features. The new features are given the name <code>feature_n</code>, where n stands for the n-th feature in the dataset. You can access the genetic feature's fitness and description (how they are calculated) through the <code>genetic_features</code> attribute.</p> <p>ATOM uses the SymbolicTransformer class from the gplearn package for the genetic algorithm. Read more about this implementation here.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#grouping-similar-features", "title": "Grouping similar features", "text": "<p>When your dataset contains many similar features corresponding to a certain natural group or entity, it's possible to replace these features for a handful of them, that should capture the relations of the group, in order to lose as little information as possible. To achieve this, the FeatureGrouper class computes certain statistical properties that describe the group's distribution, like the mean or the median, and replaces the columns with the result of these statistical calculations over every row in the dataset. The goal of this approach is to reduce the number of columns in the dataset, avoiding the curse of dimensionality.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#selecting-useful-features", "title": "Selecting useful features", "text": "<p>The FeatureSelector class provides tooling to select the relevant features from a dataset. It can be accessed from atom through the feature_selection method.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#standard-strategies", "title": "Standard strategies", "text": "<p> Univariate Univariate feature selection works by selecting the best features based on univariate statistical F-test. The test is provided via the <code>solver</code> parameter. It takes any function taking two arrays (X, y), and returning arrays (scores, p-values). Read more in sklearn's documentation.</p> <p></p> <p> Principal Components Analysis Applying PCA reduces the dimensionality of the dataset by maximizing the variance of each dimension. The new features are called <code>pca0</code>, <code>pca1</code>, etc... PCA can be applied in three ways:</p> <ul> <li>If the data is dense (i.e. not sparse), the estimator used is PCA.   Before fitting the transformer, the data is scaled to mean=0 and std=1   if it wasn't already. Read more in sklearn's documentation.</li> <li>If the data is [sparse][sparse datasets] (often the case for term-document   matrices, see Vectorizer), the estimator used is TruncatedSVD.   Read more in sklearn's documentation.</li> <li>If <code>engine</code> is \"sklearnex\" or \"cuml\", the estimator   used is the package's PCA implementation. Sparse data is not supported for   neither engine.</li> </ul> <p></p> <p> Selection from model SFM uses an estimator with <code>feature_importances_</code> or <code>coef_</code> attributes to select the best features in a dataset based on importance weights. The estimator is provided through the <code>solver</code> parameter and can be already fitted. ATOM allows you to use one its predefined models, e.g. <code>solver=\"RF\"</code>. If you didn't call the FeatureSelector through atom, don't forget to indicate the estimator's task adding <code>_class</code> or <code>_reg</code> after the name, e.g. <code>RF_class</code> to use a random forest classifier. Read more in sklearn's documentation.</p> <p></p> <p> Sequential Feature Selection Sequential feature selection adds (forward selection) or removes (backward selection) features to form a feature subset in a greedy fashion. At each stage, this estimator chooses the best feature to add or remove based on the cross-validation score of an estimator. Read more in sklearn's documentation.</p> <p></p> <p> Recursive Feature Elimination Select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of features, and the importance of each feature is obtained either through a <code>coef_</code> or through a <code>feature_importances_</code> attribute. Then, the least important features are pruned from current set of features. That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached. Note that, since RFE needs to fit the model again every iteration, this method can be fairly slow.</p> <p>RFECV applies the same algorithm as RFE but uses a cross-validated metric (under the scoring parameter, see RFECV) to assess every step's performance. Also, where RFE returns the number of features selected by <code>n_features</code>, RFECV returns the number of features that achieved the optimal score on the specified metric. Note that this is not always equal to the amount specified by <code>n_features</code>. Read more in sklearn's documentation.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#advanced-strategies", "title": "Advanced strategies", "text": "<p>The following strategies are a collection of nature-inspired optimization algorithms that maximize an objective function. If not manually specified, the function calculates the cross-validated score of a model on the data. Use the <code>scoring</code> parameter (not present in description, part of kwargs) to specify the metric to optimize on.</p> <p></p> <p> Particle Swarm Optimization Particle Swarm Optimization (PSO) optimizes a problem by having a population of candidate solutions (particles), and moving them around in the search-space according to simple mathematical formula over the particle's position and velocity. Each particle's movement is influenced by its local best known position, but is also guided toward the best known positions in the search-space, which are updated as better positions are found by other particles. This is expected to move the swarm toward the best solutions. Read more here.</p> <p></p> <p> Harris Hawks Optimization Harris Hawks Optimization (HHO) mimics the action and reaction of Hawk's team collaboration hunting in nature and prey escaping to discover the solutions of the single-objective problem. Read more here.</p> <p></p> <p> Grey Wolf Optimization The Grey Wolf Optimizer (GWO) mimics the leadership hierarchy and hunting mechanism of grey wolves in nature. Four types of grey wolves such as alpha, beta, delta, and omega are employed for simulating the leadership hierarchy. In addition, three main steps of hunting, searching for prey, encircling prey, and attacking prey, are implemented to perform optimization. Read more here.</p> <p></p> <p> Dragonfly Optimization The Dragonfly Algorithm (DFO) algorithm originates from static and dynamic swarming behaviours. These two swarming behaviours are very similar to the two main phases of optimization using meta-heuristics: exploration and exploitation. Dragonflies create sub swarms and fly over different areas in a static swarm, which is the main objective of the exploration phase. In the static swarm, however, dragonflies fly in bigger swarms and along one direction, which is favourable in the exploitation phase. Read more here.</p> <p></p> <p> Genetic Optimization Genetic Optimization is a metaheuristic inspired by the process of natural selection that belongs to the larger class of evolutionary algorithms. Genetic algorithms are commonly used to generate high-quality solutions to optimization and search problems by relying on biologically inspired operators such as mutation, crossover and selection. Read more here.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#other-selection-methods", "title": "Other selection methods", "text": "<p>Removing features with low or high variance Variance is the expectation of the squared deviation of a random variable from its mean. Features with low variance have many values repeated, which means the model can't learn much from them. In a similar way, features with very high variance have very few values repeated, which makes it also difficult for a model to learn from this feature.</p> <p>FeatureSelector removes a categorical feature when the maximum number of occurrences for any value is below <code>min_repeated</code> or when the same value is repeated in at least <code>max_repeated</code> fraction of the rows. The default option is to remove a feature if all values in it are either different or exactly the same.</p> <p></p> <p>Removing features with multi-collinearity Two features that are highly correlated are redundant, i.e. two will not contribute more to the model than only one of them. FeatureSelector will drop a feature that has a Pearson correlation coefficient larger than <code>max_correlation</code> with another feature. A correlation of 1 means the two columns are equal. A dataframe of the removed features and their correlation values can be accessed through the <code>collinear</code> attribute.</p>"}, {"location": "user_guide/introduction/", "title": "Introduction", "text": "<p>There is no magic formula in data science that can tell us which type of machine learning estimator in combination with which pipeline will perform best for a given raw dataset. Different models are better suited for different types of data and different types of problems. You can follow some rough guide on how to approach problems with regard to which model to try, but these are incomplete at best.</p> <p>During the exploration phase of a machine learning project, a data scientist tries to find the optimal pipeline for his specific use case. This usually involves applying standard data cleaning steps, creating or selecting useful features, trying out different models, etc. Testing multiple pipelines requires many lines of code, and writing it all in the same notebook often makes it long and cluttered. On the other hand, using multiple notebooks makes it harder to compare the results and to keep an overview. On top of that, refactoring the code for every test can be quite time-consuming. How many times have you conducted the same action to pre-process a raw dataset? How many times have you copy-and-pasted code from an old repository to re-use it in a new use case?</p> <p>Although best practices tell us to start with a simple model and build up to more complicated ones, many data scientists just use the model best known to them in order to avoid the aforementioned problems. This can result in poor performance (because the model is just not the right one for the task) or in inefficient management of time and computing resources (because a simpler/faster model could have achieved a similar performance).</p> <p>ATOM is here to help solve these common issues. The package acts as a wrapper of the whole machine learning pipeline, helping the data scientist to rapidly find a good model for his problem. Avoid endless imports and documentation lookups. Avoid rewriting the same code over and over again. With just a few lines of code, it's now possible to perform basic data cleaning steps, select relevant features and compare the performance of multiple models on a given dataset, providing quick insights on which pipeline performs best for the task at hand.</p> <p>It is important to realize that ATOM is not here to replace all the work a data scientist has to do before getting his model into production. ATOM doesn't spit out production-ready models just by tuning some parameters in its API. After helping you determine the right pipeline, you will most probably need to fine-tune it using use-case specific features and data cleaning steps in order to achieve maximum performance.</p> <p>Example steps taken by ATOM's pipeline:</p> <ol> <li>Data Cleaning<ul> <li>Handle missing values</li> <li>Encode categorical features</li> <li>Detect and remove outliers</li> <li>Balance the training set</li> </ul> </li> <li>Feature engineering<ul> <li>Create new non-linear features</li> <li>Select the most promising features</li> </ul> </li> <li>Train and validate multiple models<ul> <li>Apply hyperparameter tuning</li> <li>Fit the models on the training set</li> <li>Evaluate the results on the test set</li> </ul> </li> <li>Analyze the results<ul> <li>Get the scores on various metrics</li> <li>Make plots to compare the model performances</li> </ul> </li> </ol> <p></p> <p></p> Figure 1. Diagram of a possible pipeline created by ATOM."}, {"location": "user_guide/logging/", "title": "Logging &amp; Tracking", "text": ""}, {"location": "user_guide/logging/#logging", "title": "Logging", "text": "<p>To start logging your experiments, fill the <code>logger</code> parameter with the name or path to store the logging file. If automatic naming is used, the file is saved using the __name__ of the class followed by the timestamp of the logger's creation, e.g. <code>ATOMClassifier_11May21_20h11m03s</code>. The logging file contains method calls, all printed messages to stdout with maximum verbosity, and any exception raised during running. Additionally, the logging entries of external libraries are redirected to the same file handler.</p> <p></p>"}, {"location": "user_guide/logging/#tracking", "title": "Tracking", "text": "<p>ATOM uses MLflow Tracking as a backend API and UI for logging models, parameters, pipelines, data and plots. Start tracking your experiments assigning a name to the <code>experiment</code> parameter. Every model is tracked using a separate run. When no backend is configured, the data is stored locally at <code>./mlruns</code>. To configure the backend, use mlflow.set_tracking_uri in your notebook or IDE before initializing atom. This does not affect the currently active run (if one exists), but takes effect for successive runs. Run <code>mlflow ui</code> on your terminal to open MLflow's Tracking UI and  view it at http://localhost:5000.</p> <p>Note</p> <p>When using ATOM on Databricks, the experiment's name should include the complete path to the storage, e.g. <code>/Users/username@domain.com/experiment_name</code>.</p> <p>Example</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, experiment=\"breast_cancer\")\natom.run(models=[\"LR\", \"RF\", \"LGB\"], n_trials=(0, 0, 10))\n</code></pre> <p></p> <p></p>"}, {"location": "user_guide/logging/#dagshub-integration", "title": "DAGsHub integration", "text": "<p>ATOM has a build-in integration with DAGsHub, a web platform based on open source tools, optimized for data science and oriented towards the open source community. To store your mlflow experiments in a DAGsHub repo, type <code>dagshub:&lt;experiment_name&gt;</code> in the <code>experiment</code> parameter (instead of just the experiment's name). If the repo does not already exist, a new public repo is created.</p> <p>Info</p> <p>If you are logged into your DAGsHub account when initializing atom with a dagshub experiment, a page on your web browser is automatically opened to give access permissions. If not, read here how to set up your DAGsHub credentials.</p> <p>Example</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, experiment=\"dagshub:breast_cancer\")\natom.run(models=[\"LR\", \"RF\"])\n</code></pre> <p></p> <p></p>"}, {"location": "user_guide/logging/#tracked-elements", "title": "Tracked elements", "text": "<p>Tags The runs are automatically tagged with the model's full name, the branch from which the model was trained, and the time it took to fit the model. Add additional custom tags through the <code>ht_params</code> parameter, e.g.  <code>atom.run([\"LR\", \"RF\"], ht_params={\"tags\": {\"tag1\": 1}})</code>.</p> <p>Parameters All parameters used by the estimator at initialization are tracked. Additional parameters passed to the fit method are not tracked.</p> <p>Model The model's estimator is stored as artifact. The estimator has to be compatible with the mlflow.sklearn, module. This option can be switched off using atom's <code>log_model</code> attribute, e.g. <code>atom.log_model = False</code>.</p> <p>Hyperparameter tuning If hyperparameter tuning is performed, every trial is tracked as a nested run in the model's main run. This option can be switched off using atom's <code>log_ht</code> attribute, e.g. <code>atom.log_ht = False</code>. The data and pipeline options are never stored within nested runs.</p> <p>Metrics All metric results are tracked, not only during training, but also when the evaluate method is called at a later point. Metrics calculated during in-training validation are also stored.</p> <p>Dataset The train and test sets used to fit and evaluate the model can be stored as <code>.csv</code> files to the run's artifacts. This option can be switched on using atom's <code>log_data</code> attribute, e.g. <code>atom.log_data = True</code>.</p> <p>Pipeline The model's pipeline (returned from the export_pipeline method) can be stored as an artifact. This option can be switched on using atom's <code>log_pipeline</code> attribute, e.g. <code>atom.log_pipeline = True</code>.</p> <p>Plots By default, plots are stored as <code>.html</code> artifacts in all runs corresponding to the models that are showed in the plot. If the <code>filename</code> parameter is specified, they are stored under that name, else the method's name is used. This option can be switched off using atom's <code>log_plots</code> attribute, e.g. <code>atom.log_plots = False</code>.</p>"}, {"location": "user_guide/models/", "title": "Models", "text": ""}, {"location": "user_guide/models/#predefined-models", "title": "Predefined models", "text": "<p>ATOM provides many models for classification and regression tasks that can be used to fit the data in the pipeline. After fitting, a class containing the underlying estimator is attached to atom as an attribute. We refer to these \"subclasses\" as models. Apart from the estimator, the models contain a variety of attributes and methods that can help you understand how the underlying estimator performed. They can be accessed using their acronyms, e.g. <code>atom.LGB</code> to access the LightGBM model. The available models and their corresponding acronyms are:</p> <ul> <li>AdaBoost (AdaB)</li> <li>ARIMA (Arima)</li> <li>AutoARIMA (AutoARIMA)</li> <li>AutomaticRelevanceDetermination (ARD)</li> <li>Bagging (Bag)</li> <li>BayesianRidge (BR)</li> <li>BernoulliNB (BNB)</li> <li>CatBoost (CatB)</li> <li>CategoricalNB (CatNB)</li> <li>ComplementNB (CNB)</li> <li>DecisionTree (Tree)</li> <li>Dummy (Dummy)</li> <li>ElasticNet (EN)</li> <li>ETS (ETS)</li> <li>ExponentialSmoothing (ES)</li> <li>ExtraTree (ETree)</li> <li>ExtraTrees (ET)</li> <li>GaussianNB (GNB)</li> <li>GaussianProcess (GP)</li> <li>GradientBoostingMachine (GBM)</li> <li>HuberRegression (Huber)</li> <li>HistGradientBoosting (hGBM)</li> <li>KNearestNeighbors (KNN)</li> <li>Lasso (Lasso)</li> <li>LeastAngleRegression (Lars)</li> <li>LightGBM (LGB)</li> <li>LinearDiscriminantAnalysis (LDA)</li> <li>LinearSVM (lSVM)</li> <li>LogisticRegression (LR)</li> <li>MultiLayerPerceptron (MLP)</li> <li>MultinomialNB (MNB)</li> <li>NaiveForecaster (NF)</li> <li>OrdinaryLeastSquares (OLS)</li> <li>OrthogonalMatchingPursuit (OMP)</li> <li>PassiveAggressive (PA)</li> <li>Perceptron (Perc)</li> <li>PolynomialTrend (PT)</li> <li>QuadraticDiscriminantAnalysis (QDA)</li> <li>RadiusNearestNeighbors (RNN)</li> <li>RandomForest (RF)</li> <li>Ridge (Ridge)</li> <li>StochasticGradientDescent (SGD)</li> <li>SupportVectorMachine (SVM)</li> <li>XGBoost (XGB)</li> </ul> <p>Warning</p> <p>The model classes can not be initialized directly by the user! Use them only through atom.</p> <p>Tip</p> <p>The acronyms are case-insensitive, e.g. <code>atom.lgb</code> also calls the LightGBM model.</p> <p></p>"}, {"location": "user_guide/models/#custom-models", "title": "Custom models", "text": "<p>It is also possible to create your own models in ATOM's pipeline. For example, imagine we want to use sklearn's RANSACRegressor estimator (note that is not included in ATOM's predefined models). There are two ways to achieve this:</p> <ul> <li>Using ATOMModel (recommended). With this approach you can pass   the required model characteristics to the pipeline.</li> </ul> <pre><code>from atom import ATOMRegressor, ATOMModel\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nransac = ATOMModel(RANSACRegressor, name=\"RANSAC\", needs_scaling=True)\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run(ransac)\n</code></pre> <ul> <li>Using the estimator's class or an instance of the class. This approach   will also call ATOMModel under the hood, but it will leave its   parameters to their default values.</li> </ul> <pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run(RANSACRegressor)\n</code></pre> <p>Additional things to take into account:</p> <ul> <li>Custom models can be accessed through their acronym like any other model, e.g.   <code>atom.ransac</code> in the example above.</li> <li>Custom models are not restricted to sklearn estimators, but they should   follow sklearn's API, i.e. have a fit and predict method.</li> <li>Parameter customization (for the initializer) is only possible for   custom models which provide an estimator that has a <code>set_params()</code> method,   i.e. it's a child class of BaseEstimator.</li> <li>Hyperparameter tuning for custom models is ignored unless appropriate   dimensions are provided through <code>ht_params</code>.</li> </ul> <p></p>"}, {"location": "user_guide/models/#deep-learning", "title": "Deep learning", "text": "<p>Deep learning models can be used through ATOM's custom models as long as they follow sklearn's API. For example, models implemented with the Keras package should use the scikeras wrappers KerasClassifier or KerasRegressor.</p> <p>Many deep learning use cases, for example in computer vision, use datasets with more than 2 dimensions, e.g. image data can have shape (n_samples, length, width, rgb). Luckily, scikeras has a workaround to be able to work with such datasets. Learn with this example how to use ATOM to train and validate a Convolutional Neural Network on an image dataset.</p> <p>Warning</p> <p>Models implemented with keras can only use custom hyperparameter tuning when <code>n_jobs=1</code> or <code>ht_params={\"cv\": 1}</code>. Using n_jobs &gt; 1 and cv &gt; 1 raises a PicklingError due to incompatibilities of the APIs.</p> <p></p>"}, {"location": "user_guide/models/#ensembles", "title": "Ensembles", "text": "<p>Ensemble models use multiple estimators to obtain better predictive performance than could be obtained from any of the constituent learning algorithms alone. ATOM implements two ensemble techniques: voting and stacking. Click here to see an example that uses ensemble models.</p> <p>If the ensemble's underlying estimator is a model that used automated feature scaling, it's added as a Pipeline containing the <code>scaler</code> and estimator. If a mlflow experiment is active, the ensembles start their own run, just like the predefined models do.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p>"}, {"location": "user_guide/models/#voting", "title": "Voting", "text": "<p>The idea behind voting is to combine the predictions of conceptually different models to make new predictions. Such a technique can be useful for a set of equally well performing models in order to balance out their individual weaknesses. Read more in sklearn's documentation.</p> <p>A voting model is created from a trainer through the voting method. The voting model is added automatically to the list of models in the trainer, under the <code>Vote</code> acronym. The underlying estimator is a custom adaptation of VotingClassifier or VotingRegressor depending on the task. The differences between ATOM's and sklearn's implementation are:</p> <ul> <li>ATOM's implementation doesn't fit estimators if they're already fitted.</li> <li>ATOM's instance is considered fitted at initialization when all underlying   estimators are.</li> <li>ATOM's VotingClassifier doesn't implement a LabelEncoder to encode the   target column.</li> </ul> <p>The two estimators are customized in this way to save time and computational resources, since the classes are always initialized with fitted estimators. As a consequence of this, the VotingClassifier can not use sklearn's build-in LabelEncoder for the target column since it can't be fitted when initializing the class. For the vast majority of use cases, the changes will have no effect. If you want to export the estimator and retrain it on different data, just make sure to clone the underlying estimators first.</p> <p></p>"}, {"location": "user_guide/models/#stacking", "title": "Stacking", "text": "<p>Stacking is a method for combining estimators to reduce their biases. More precisely, the predictions of each individual estimator are stacked together and used as input to a final estimator to compute the prediction. Read more in sklearn's documentation.</p> <p>A stacking model is created from a trainer through the stacking method. The stacking model is added automatically to the list of models in the trainer, under the <code>Stack</code> acronym. The underlying estimator is a custom adaptation of StackingClassifier or StackingRegressor depending on the task. The only difference between ATOM's and sklearn's implementation is that ATOM's implementation doesn't fit estimators if they're already fitted. The two estimators are customized in this way to save time and computational resources, since the classes are always initialized with fitted estimators. For the vast majority of use cases, the changes will have no effect. If you want to export the estimator and retrain it on different data, just make sure to clone the underlying estimators first.</p>"}, {"location": "user_guide/nlp/", "title": "Natural Language Processing", "text": "<p>Natural Language Processing (NLP) is the subfield of machine learning that works with human language data. The nlp module contains four classes that help to convert raw text to meaningful numeric values, ready to be ingested by a model. ATOM uses the nltk library for the majority of its NLP processes.</p> <p>The text documents are expected to be provided in a column of the dataframe named <code>corpus</code> (the name is case-insensitive). Only the corpus is changed by the transformers, leaving the rest of the columns as is. This mechanism allows atom to combine datasets containing a text corpus with other non-text features. If an array is provided as input, it should consist of only one feature containing the text (one document per row). ATOM will then automatically convert the array to a dataframe with the desired column name. Documents are expected to be strings or sequences of words. Click here for an example using text data.</p> <p>Note</p> <p>All of atom's NLP methods automatically adopt the relevant transformer attributes (<code>verbose</code>, <code>logger</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.tokenize(verbose=0)</code>.</p> <p>Info</p> <p>ATOM doesn't do topic modelling! The module's goal is to help process text documents into features that can be used for supervised learning.</p> <p></p>"}, {"location": "user_guide/nlp/#text-cleaning", "title": "Text cleaning", "text": "<p>Text data is rarely clean. Whether it's scraped from a website or inferred from paper documents, it's always populated with irrelevant information for the model, such as email addresses, HTML tags, numbers or punctuation marks. Use the TextCleaner class to clean the corpus from such noise. It can be accessed from atom through the textclean method. Use the class' parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Decode unicode characters to their ascii representations.</li> <li>Convert all characters to lower case.</li> <li>Drop email addresses from the text.</li> <li>Drop URL links from the text.</li> <li>Drop HTML tags from the text.</li> <li>Drop emojis from the text.</li> <li>Drop numbers from the text.</li> <li>Drop punctuations from the text.</li> </ul> <p></p>"}, {"location": "user_guide/nlp/#tokenization", "title": "Tokenization", "text": "<p>Some text processing algorithms, like stemming or lemmatization, require the corpus to be made out of tokens, instead of strings, in order to know what to consider as words. Tokenization is used to achieve this. It separates every document into a sequence of smaller units. In this case, the words.</p> <p>Sometimes, words have a different meaning on their own than when combined with adjacent words. For example, the word <code>new</code> has a completely different meaning when the word <code>york</code> is directly after it than when it's not. These combinations of two words are called bigrams. When there are three words, they are called trigrams, and with four words quadgrams.</p> <p>The Tokenizer class converts a document into a sequence of words, and can create the most frequent bigrams, trigrams and quadgrams. It can be accessed from atom through the tokenize method.</p> <p></p>"}, {"location": "user_guide/nlp/#text-normalization", "title": "Text Normalization", "text": "<p>Normalization for texts is a process that converts a list of words to a more uniform standard. This is useful to reduce the amount of different information that the computer has to deal with, and therefore improves efficiency. The goal of normalization techniques like stemming and lemmatization is to reduce inflectional and related forms of a word to a common base form.</p> <p>Normalize the words in the corpus using the TextNormalizer class. It can be accessed from atom through the textnormalize method.</p> <p></p>"}, {"location": "user_guide/nlp/#vectorization", "title": "Vectorization", "text": "<p>Text data cannot be fed directly to the algorithms themselves, as most of them expect numerical feature vectors with a fixed size, rather than words in the text documents with variable length. Vectorization is the general process of turning a collection of text documents into numerical feature vectors. You can apply it to the corpus using the Vectorizer class. It can be accessed from atom through the vectorize method.</p> <p>Info</p> <p>All strategies can utilize GPU speed-up. Click here for further information about GPU acceleration.</p> <p></p> <p>Bag of Words The Bag of Words (BOW) strategy applies tokenization, counting and normalization to the corpus. Documents are described by word occurrences while completely ignoring the relative position information of the words in the document. The created columns are named with the words they are embedding with the prefix <code>corpus_</code>. Read more in sklearn's documentation.</p> <p></p> <p>TF-IDF In a large text corpus, some words will be very present (e.g. \u201cthe\u201d, \u201ca\u201d, \u201cis\u201d in English), hence carrying very little meaningful information about the actual contents of the document. If we were to feed the direct count data directly to a classifier, those very frequent terms would shadow the frequencies of rarer, yet more interesting, terms. Use the TF-IDF strategy to re-weight the count features into floating point values. The created columns are named with the words they are embedding with the prefix <code>corpus_</code>. Read more in sklearn's documentation.</p> <p></p> <p>Hashing The larger the corpus, the larger the vocabulary will grow and thus increasing the number of features and memory use. Use the Hashing strategy to hash the words to a specified number of features. The created features are named <code>hash0</code>, <code>hash1</code>, etc... Read more in sklearn's documentation.</p>"}, {"location": "user_guide/nomenclature/", "title": "Nomenclature", "text": "<p>This documentation consistently uses terms to refer to certain concepts related to this package. The most frequent terms are described hereunder.</p> <p></p> ATOM <p>Refers to this package.</p> atom <p>Instance of the ATOMClassifier or ATOMRegressor classes (note that the examples use it as the default variable name).</p> <p>branch</p> <p>A pipeline, corresponding dataset and models fitted to that dataset. See the branches section of the user guide.</p> categorical columns <p>Refers to all columns of type <code>object</code> or <code>category</code>.</p> class <p>Unique value in a column, e.g. a binary classifier has 2 classes in the target column.</p> dataframe <p>Two-dimensional, size-mutable, potentially heterogeneous tabular data of type pd.DataFrame or its modin counterpart.</p> dataframe-like <p>Any type object from which a dataframe can be created. This includes an iterable, a dict whose values are 1d-arrays, a two-dimensional list, tuple, np.ndarray or sps.csr_matrix, and most commonly, a dataframe. This is the standard input format for any dataset.</p> <p>Additionally, you can provide a callable whose output is any of the aforementioned types. This is useful when the dataset is very large and you are performing parallel operations, since it can avoid broadcasting a large dataset from the driver to the workers.</p> estimator <p>An object which manages the estimation and decoding of an algorithm. The algorithm is estimated as a deterministic function of a set of parameters, a dataset and a random state. Should implement a <code>fit</code> method. Often used interchangeably with predictor because of user preference.</p> index <p>Immutable sequence used for indexing and alignment of type pd.Index, pd.MultiIndex or their modin counterparts.</p> missing values <p>All values in the <code>missing</code> attribute, as well as <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code>.</p> model <p>Instance of a model in the pipeline. Not to confuse with estimator.</p> outliers <p>Sample that contains one or more outlier values. Note that the Pruner class can use a different definition for outliers depending on the chosen strategy.</p> outlier value <p>Value that lies further than 3 times the standard deviation away from the mean of its column, i.e. |z-score| &gt; 3.</p> pipeline <p>Sequence of transformers in a specific (usually the current) branch.</p> predictor <p>An estimator implementing a <code>predict</code> method.</p> scorer <p>A non-estimator callable object which evaluates an estimator on given test data, returning a number. Unlike evaluation metrics, a greater returned number must correspond with a better score. See sklearn's documentation.</p> sequence <p>A one-dimensional, indexable array of type list, tuple, np.ndarray or series. This is the standard input format for a dataset's target column.</p> series <p>One-dimensional ndarray with axis labels of type pd.Series or its modin counterpart.</p> target <p>The dependent variable in a supervised learning task. Passed as <code>y</code> to an estimator's fit method.</p> task <p>One of the supervised machine learning approaches that ATOM supports:</p> <ul> <li>binary classification</li> <li>multiclass classification</li> <li>multilabel classification</li> <li>multiclass-multioutput classification</li> <li>regression</li> <li>multioutput regression</li> <li>univariate forecasting</li> <li>multivariate forecasting</li> </ul> transformer <p>An estimator implementing a <code>transform</code> method. This encompasses all data cleaning and feature engineering classes.</p>"}, {"location": "user_guide/plots/", "title": "Plots", "text": "<p>ATOM provides many plotting methods to analyze the data or compare the model performances. Descriptions and examples can be found in the API section. ATOM mainly uses the plotly library for plotting. Plotly makes interactive, publication-quality graphs that are rendered using html. Some plots require other libraries like matplotlib, shap, wordcloud and schemdraw.</p> <p>Plots that compare model performances (methods with the <code>models</code> parameter) can be called directly from atom, e.g. <code>atom.plot_roc()</code>, or from one of the models, e.g. <code>atom.adab.plot_roc()</code>. If called from atom, use the <code>models</code> parameter to specify which models to plot. If called from a specific model, it makes the plot only for that model and the <code>models</code> parameter becomes unavailable.</p> <p>Plots that analyze the data (methods without the <code>models</code> parameter) can only be called from atom, and not from the models.</p> <p></p>"}, {"location": "user_guide/plots/#parameters", "title": "Parameters", "text": "<p>Apart from the plot-specific parameters, all plots have five parameters in common:</p> <ul> <li>The <code>title</code> parameter adds a title to the plot. The default value doesn't   show any title. Provide a configuration (as dictionary) to customize its   appearance, e.g. <code>title=dict(text=\"Awesome plot\", color=\"red\")</code>.   Read more in plotly's documentation.</li> <li> <p>The <code>legend</code> parameter is used to show/hide, position or customize the   plot's legend. Provide a configuration (as dictionary) to customize its   appearance (e.g. <code>legend=dict(title=\"Title for legend\", title_font_color=\"red\")</code>)   or choose one of the following locations:</p> <ul> <li>upper left</li> <li>upper right</li> <li>lower left</li> <li>lower right</li> <li>upper center</li> <li>lower center</li> <li>center left</li> <li>center right</li> <li>center</li> <li>out: Position the legend outside the axis, on the right hand side. This   is plotly's default position. Note that this shrinks the size of the axis   to fit both legend and axes in the specified <code>figsize</code>.</li> </ul> </li> <li> <p>The <code>figsize</code> parameter adjust the plot's size.</p> </li> <li>The <code>filename</code> parameter is used to save the plot.</li> <li>The <code>display</code> parameter determines whether to show or return the plot.</li> </ul> <p></p>"}, {"location": "user_guide/plots/#aesthetics", "title": "Aesthetics", "text": "<p>The plot's aesthetics can be customized using the plot attributes prior to calling the plotting method, e.g. <code>atom.title_fontsize = 30</code>. The default values are:</p> <ul> <li>palette: [\"rgb(0, 98, 98)\", \"rgb(56, 166, 165)\", \"rgb(115, 175, 72)\",   \"rgb(237, 173, 8)\", \"rgb(225, 124, 5)\", \"rgb(204, 80, 62)\", \"rgb(148, 52, 110)\",   \"rgb(111, 64, 112)\", \"rgb(102, 102, 102)\"]</li> <li>title_fontsize: 24</li> <li>label_fontsize: 16</li> <li>tick_fontsize: 12</li> </ul> <p>Use atom's update_layout method to further customize the plot's layout using any of plotly's layout properties, e.g. <code>atom.update_layout(template=\"plotly_dark\")</code>. Similarly, use the update_traces method to customize the traces properties, e.g. <code>atom.update_traces(mode=\"lines+markers\")</code>.</p> <p>The reset_aesthetics method allows you to reset all aesthetics to their default value. See advanced plotting for an example.</p> <p></p>"}, {"location": "user_guide/plots/#canvas", "title": "Canvas", "text": "<p>Use the canvas method to draw multiple plots side by side, for example to make it easier to compare similar results. The canvas method is a <code>@contextmanager</code>, i.e. it's used through Python's <code>with</code> command. Plots in a canvas ignore the legend, figsize, filename and display parameters. Instead, specify these parameters in the canvas. If a variable is assigned to the canvas (e.g. <code>with atom.canvas() as fig</code>), it yields the resulting figure.</p> <p>For example, we can use a canvas to compare the results of a XGBoost and LightGBM model on the train and test set. We could also draw the lines for both models in the same axes, but that would clutter the plot too much. Click here for more examples.</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y)\natom.run([\"XGB\", \"LGB\"])\n\nwith atom.canvas(2, 2, title=\"XGBoost vs LightGBM\"):\n    atom.xgb.plot_roc(dataset=\"train+test\", title=\"ROC - XGBoost\")\n    atom.lgb.plot_roc(dataset=\"train+test\", title=\"ROC - LightGBM\")\n    atom.xgb.plot_prc(dataset=\"train+test\", title=\"PRC - XGBoost\")\n    atom.lgb.plot_prc(dataset=\"train+test\", title=\"PRC - LightGBM\")\n</code></pre> <p></p>"}, {"location": "user_guide/plots/#shap", "title": "SHAP", "text": "<p>The SHAP (SHapley Additive exPlanations) python package uses a game theoretic approach to explain the output of any machine learning model. It connects optimal credit allocation with local explanations using the classic Shapley values from game theory and their related extensions. ATOM implements methods to plot 7 of SHAP's plotting functions directly from its API. A list of available shap plots can be found here.</p> <p>Calculating the Shapley values is computationally expensive, especially for model agnostic explainers like Permutation. To avoid having to recalculate the values for every plot, ATOM stores the shapley values internally after the first calculation, and access them later when needed again.</p> <p>Note</p> <p>Since the plot figures are not made by ATOM, note the following:</p> <ul> <li>It's not possible to draw multiple models in the same figure.   Selecting more than one model will raise an exception. To avoid   this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_force()</code>.</li> <li>The returned plot is a matplotlib figure, not plotly's.</li> </ul> <p></p>"}, {"location": "user_guide/plots/#available-plots", "title": "Available plots", "text": "<p>A list of available plots can be found hereunder. Note that not all plots can be called from every class and that their availability can depend on the task at hand.</p>"}, {"location": "user_guide/plots/#feature-selection-plots", "title": "Feature selection plots", "text": "<p>plot_componentsPlot the explained variance ratio per component.plot_pcaPlot the explained variance ratio vs number of components.plot_rfecvPlot the rfecv results.</p>"}, {"location": "user_guide/plots/#data-plots", "title": "Data plots", "text": "<p>plot_correlationPlot a correlation matrix.plot_distributionPlot column distributions.plot_ngramsPlot n-gram frequencies.plot_qqPlot a quantile-quantile plot.plot_relationshipsPlot pairwise relationships in a dataset.plot_wordcloudPlot a wordcloud from the corpus.</p>"}, {"location": "user_guide/plots/#hyperparameter-tuning-plots", "title": "Hyperparameter tuning plots", "text": "<p>plot_edfPlot the Empirical Distribution Function of a study.plot_hyperparameter_importancePlot a model's hyperparameter importance.plot_hyperparametersPlot hyperparameter relationships in a study.plot_parallel_coordinatePlot high-dimensional parameter relationships in a study.plot_pareto_frontPlot the Pareto front of a study.plot_slicePlot the parameter relationship in a study.plot_terminator_improvementPlot the potentials for future objective improvement.plot_timelinePlot the timeline of a study.plot_trialsPlot the hyperparameter tuning trials.</p>"}, {"location": "user_guide/plots/#prediction-plots", "title": "Prediction plots", "text": "<p>plot_calibrationPlot the calibration curve for a binary classifier.plot_confusion_matrixPlot a model's confusion matrix.plot_detPlot the Detection Error Tradeoff curve.plot_errorsPlot a model's prediction errors.plot_evalsPlot evaluation curves.plot_feature_importancePlot a model's feature importance.plot_forecastPlot a time series with model forecasts.plot_gainsPlot the cumulative gains curve.plot_learning_curvePlot the learning curve: score vs number of training samples.plot_liftPlot the lift curve.plot_parshapPlot the partial correlation of shap values.plot_partial_dependencePlot the partial dependence of features.plot_permutation_importancePlot the feature permutation importance of models.plot_pipelinePlot a diagram of the pipeline.plot_prcPlot the precision-recall curve.plot_probabilitiesPlot the probability distribution of the target classes.plot_residualsPlot a model's residuals.plot_resultsPlot the model results.plot_rocPlot the Receiver Operating Characteristics curve.plot_successive_halvingPlot scores per iteration of the successive halving.plot_thresholdPlot metric performances against threshold values.</p>"}, {"location": "user_guide/plots/#shap-plots", "title": "Shap plots", "text": "<p>plot_shap_barPlot SHAP's bar plot.plot_shap_beeswarmPlot SHAP's beeswarm plot.plot_shap_decisionPlot SHAP's decision plot.plot_shap_forcePlot SHAP's force plot.plot_shap_heatmapPlot SHAP's heatmap plot.plot_shap_scatterPlot SHAP's scatter plot.plot_shap_waterfallPlot SHAP's waterfall plot.</p>"}, {"location": "user_guide/predicting/", "title": "Predicting", "text": ""}, {"location": "user_guide/predicting/#prediction-methods", "title": "Prediction methods", "text": "<p>After training a model, you probably want to make predictions on new, unseen data. Just like a sklearn estimator, you can call the prediction methods from the model, e.g. <code>atom.tree.predict(X)</code>.</p> <p>All prediction methods transform the provided data through the pipeline in the model's branch before making the predictions. Transformers that should only be applied on the training set are excluded from this step (e.g. outlier pruning or class balancing).</p> <p>The available prediction methods are the standard methods for estimators in sklearn's and sktime's API.</p> <p>For classification and regression tasks:</p> <p>decision_functionGet confidence scores on new data or existing rows.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.scoreGet a metric score on new data.</p> <p>For forecast tasks:</p> <p>predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.scoreGet a metric score on new data.</p>"}, {"location": "user_guide/predicting/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction methods can be calculated on the train, test and holdout set. You can access them through attributes of the form [method]_[data_set], e.g. <code>atom.mnb.predict_train</code>, <code>atom.mnb.predict_test</code> or <code>atom.mnb.predict_holdout</code>. The results are cached after the first call to avoid consequent expensive calculations (lazy properties). This mechanism can increase the size of the instance for large datasets. Use the clear method if you need to free the memory.</p> <p>Warning</p> <p>The prediction attributes for the <code>score</code> method return atom's metric score on that set, not the metric returned by sklearn's score method for estimators. Use the method's <code>metric</code> parameter to calculate a different metric.</p> <p>Note</p> <p>The <code>predict_proba</code> method of some meta-estimators for multioutput tasks (such as MultioutputClassifier) return 3 dimensions, namely, a list of arrays with shape=(n_samples, n_classes). One array per target column. Since ATOM's prediction methods return pandas objects, such 3-dimensional arrays are converted to a multiindex pd.DataFrame, where the first level of the row indices are the target columns, and the second level are the classes. Use <code>.loc[[name_of_target_column]]</code> to only select the predictions for one target.</p>"}, {"location": "user_guide/predicting/#predictions-on-rows-in-the-dataset", "title": "Predictions on rows in the dataset", "text": "<p>It's also possible to get the prediction for a specific row or rows in the dataset, providing the names or positions of the rows to the prediction methods, e.g. <code>atom.rf.predict(10)</code> returns the random forest's prediction on the 10th row in the dataset, or <code>atom.rf.predict_proba([\"index1\", \"index2\"])</code> returns the class probabilities for the rows in the dataset with indices <code>index1</code> and <code>index2</code>.</p> <p>Note</p> <p>For forecast models, prediction on rows follow the ForecastingHorizon API. That means that using the row index works, but for example using <code>atom.arima.predict(1)</code> returns the prediction on the first row of the test set (instead of the second row of the train set).</p>"}, {"location": "user_guide/time_series/", "title": "Time series", "text": ""}, {"location": "user_guide/time_series/#forecast", "title": "Forecast", "text": ""}, {"location": "user_guide/time_series/#time-series-classification", "title": "Time series classification", "text": ""}, {"location": "user_guide/time_series/#time-series-regression", "title": "Time series regression", "text": ""}, {"location": "user_guide/training/", "title": "Training", "text": "<p>The training phase is where the models are fitted on the training data. After this, you can use the plots and prediction methods to evaluate the results. The training applies the following steps for all models:</p> <ol> <li>Use hyperparameter tuning to select the optimal hyperparameters for     the model (optional).</li> <li>The model is fitted on the training set using the best combination    of hyperparameters found. After that, the model is evaluated on the tes set.</li> <li>Calculate various scores on the test set using a bootstrap    algorithm (optional).</li> </ol> <p>There are three approaches to run the training.</p> <ul> <li>Direct training:<ul> <li>DirectClassifier</li> <li>DirectForecaster</li> <li>DirectRegressor</li> </ul> </li> <li>Training via successive halving:<ul> <li>SuccessiveHalvingClassifier</li> <li>SuccessiveHalvingForecaster</li> <li>SuccessiveHalvingRegressor</li> </ul> </li> <li>Training via train sizing:<ul> <li>TrainSizingClassifier</li> <li>TrainSizingForecaster</li> <li>TrainSizingRegressor</li> </ul> </li> </ul> <p>The direct fashion repeats the aforementioned steps only once, while the other two approaches repeats them more than once. Just like the data cleaning and feature engineering classes, it's discouraged to use these classes directly. Instead, every approach can be called directly from atom through the run, successive_halving and train_sizing methods respectively.</p> <p>Models are called through their acronyms, e.g. <code>atom.run(models=\"RF\")</code> will train a RandomForest. If you want to run the same model multiple times, add a tag after the acronym to differentiate them. the tag must be  separated from the accronym by an underscore.</p> <pre><code>atom.run(\n    models=[\"RF_1\", \"RF_2\"],\n    est_params={\n        \"RF_1\": {\"n_estimators\": 100},\n        \"RF_2\": {\"n_estimators\": 200},\n    }\n)\n</code></pre> <p>For example, this pipeline fits two Random Forest models, one with 100 and the other with 200 decision trees. The models can be accessed through <code>atom.rf_1</code> and <code>atom.rf_2</code>. Use tagged models to test how the same model performs when fitted with different parameters or on different data sets. See the Imbalanced datasets example.</p> <p>Additional things to take into account:</p> <ul> <li>If an exception is encountered while fitting an estimator, the   pipeline will automatically jump to the next model. The exceptions are   stored in the <code>errors</code> attribute. Note that when a model is skipped,   there is no model subclass for that estimator.</li> <li>When showing the final results, a <code>!</code> indicates the highest score   and a <code>~</code> indicates that the model is possibly overfitting (training   set has a score at least 20% higher than the test set).</li> </ul> <p></p>"}, {"location": "user_guide/training/#metric", "title": "Metric", "text": "<p>ATOM uses sklearn's scorers for model evaluation. A scorer consists of a metric function and some parameters that define the scorer's properties , such as if a higher or lower score is better (score or loss function) or if the function needs probability estimates or rounded predictions (see the make_scorer function). The <code>metric</code> parameter accepts three ways of defining the scorer:</p> <ul> <li>Using the name of one of the predefined scorers.</li> <li>Using a function with signature <code>function(y_true, y_pred) -&gt; score</code>.   In this case, ATOM uses make_scorer   with default parameters.</li> <li>Using a scorer object.</li> </ul> <p>Note that all scorers follow the convention that higher return values are better than lower return values. Thus, metrics which measure the distance between the model and the data (i.e. loss functions), like <code>max_error</code> or <code>mean_squared_error</code>, will return the negated value of the metric.</p> <p></p>"}, {"location": "user_guide/training/#predefined-scorers", "title": "Predefined scorers", "text": "<p>ATOM accepts all sklearn's scorers as well as some custom acronyms and custom scorers. Since some of sklearn's scorers have quite long names and ATOM is all about lazyfast experimentation, the package provides acronyms for some of the most commonly used ones. These acronyms are case-insensitive and can be used in the <code>metric</code> parameter instead of the scorer's full name, e.g. <code>atom.run(\"LR\", metric=\"BA\")</code> uses <code>balanced_accuracy</code>. The available acronyms are:</p> <ul> <li>\"AP\" for \"average_precision\"</li> <li>\"BA\" for \"balanced_accuracy\"</li> <li>\"AUC\" for \"roc_auc\"</li> <li>\"LogLoss\" for \"neg_log_loss\"</li> <li>\"EV\" for \"explained_variance\"</li> <li>\"ME\" for \"max_error\"</li> <li>\"MAE\" for \"neg_mean_absolute_error\"</li> <li>\"MSE\" for \"neg_mean_squared_error\"</li> <li>\"RMSE\" for \"neg_root_mean_squared_error\"</li> <li>\"MSLE\" for \"neg_mean_squared_log_error\"</li> <li>\"MEDAE\" for \"neg_median_absolute_error\"</li> <li>\"MAPE\" for \"neg_mean_absolute_percentage_error\"</li> <li>\"POISSON\" for \"neg_mean_poisson_deviance\"</li> <li>\"GAMMA\" for \"neg_mean_gamma_deviance\"</li> </ul> <p>ATOM also provides some extra common metrics for binary classification tasks. </p> <ul> <li>\"TN\" for True Negatives</li> <li>\"FP\" for False Positives</li> <li>\"FN\" for False Negatives</li> <li>\"TP\" for True Positives</li> <li>\"FPR\" for False Positive rate (fall-out)</li> <li>\"TPR\" for True Positive Rate (sensitivity, recall)</li> <li>\"TNR\" for True Negative Rate (specificity)</li> <li>\"FNR\" for False Negative Rate (miss rate)</li> <li>\"MCC\" for Matthews Correlation Coefficient (also for multiclass classification)</li> </ul> <p></p>"}, {"location": "user_guide/training/#multi-metric-runs", "title": "Multi-metric runs", "text": "<p>Sometimes it is useful to measure the performance of the models in more than one way. ATOM lets you run the pipeline with multiple metrics at the same time. To do so, provide the <code>metric</code> parameter with a list of desired metrics, e.g. <code>atom.run(\"LDA\", metric=[\"r2\", \"mse\"])</code>.</p> <p>When fitting multi-metric runs, the resulting scores will return a list of metrics. For example, if you provided three metrics to the pipeline, <code>atom.knn.score_train</code> could return [0.8734, 0.6672, 0.9001]. Only the first metric of a multi-metric run (this metric is called the main metric) is used to select the winning model.</p> <p>Info</p> <ul> <li>The <code>winning</code> model is retrieved comparing only   the main metric.</li> <li>Some plots let you choose which of the metrics in a multi-metric run   to show using the <code>metric</code> parameter, e.g. plot_results.</li> </ul> <p></p>"}, {"location": "user_guide/training/#automated-feature-scaling", "title": "Automated feature scaling", "text": "<p>Models that require feature scaling will automatically do so before training, unless the data is sparse or already scaled. The data is considered scaled if it has one of the following prerequisites:</p> <ul> <li>The mean value over the mean of all columns lies between -0.05 and 0.05   and the mean of the standard deviation over all columns lies between 0.85   and 1.15. Categorical and binary columns (only 0s and 1s) are excluded   from the calculation.</li> <li>There is a transformer in the pipeline whose __name__ contains the   word <code>scaler</code>.</li> </ul> <p>The scaling is applied using a Scaler with default parameters. It can be accessed from the model through the <code>scaler</code> attribute. The scaled dataset can be examined through the model's data attributes. Use the available_models method to see which models require feature scaling. See here an example.</p> <p></p>"}, {"location": "user_guide/training/#in-training-validation", "title": "In-training validation", "text": "<p>Some predefined models allow in-training validation. This means that the estimator is evaluated (using only the main metric) on the train and test set after every round of the training (a round can be an iteration for linear models or an added tree for boosted tree models). The validation scores are stored in the <code>evals</code> attribute, a dictionary of the train and test performances per round (also when pruning isn't applied). Click here for an example using in-training validation.</p> <p>The predefined models that support in-training validation are:</p> <ul> <li>CatBoost</li> <li>LightGBM</li> <li>MultiLayerPerceptron</li> <li>PassiveAggressive</li> <li>Perceptron</li> <li>StochasticGradientDescent</li> <li>XGBoost</li> </ul> <p>To apply in-training validation to a custom model, use the <code>has_validation</code> parameter when creating the custom model.</p> <p>Warning</p> <ul> <li>In-training validation is not calculated during hyperparameter tuning.</li> <li>CatBoost selects the weights achieved by the best evaluation on the test set after training. This means that, by default, there is some minor data leakage in the test set. Use the <code>use_best_model=False</code> parameter to avoid this behavior or use a holdout set to evaluate the final estimator.</li> </ul> <p>Tip</p> <p>Use the plot_evals method to visualize the in-training validation on the train and test sets.</p> <p></p>"}, {"location": "user_guide/training/#parameter-customization", "title": "Parameter customization", "text": "<p>By default, every estimator uses the default parameters they get from their respective packages. To select different ones, use the <code>est_params</code>. parameter of the run method. There are two ways to add custom parameters to the models: adding them directly to the dictionary as key-value pairs or through dictionaries.</p> <p>Adding the parameters directly to <code>est_params</code> (or using a dict with the key 'all') shares them across all models in the trainer. In this example, both the XGBoost and the LightGBM model use 200 boosted trees. Make sure all the models do have the specified parameters or an exception will be raised!</p> <pre><code>atom.run(models=[\"XGB\", \"LGB\"], est_params={\"n_estimators\": 200})\n</code></pre> <p>To specify parameters per model, use the model name as key and a dict of the parameters as value. In this example, the XGBoost model uses <code>n_estimators=200</code> and the MultiLayerPerceptron uses one hidden layer with 75 neurons.</p> <pre><code>atom.run(\n    models=[\"XGB\", \"MLP\"],\n    est_params={\n        \"XGB\": {\"n_estimators\": 200},\n        \"MLP\": {\"hidden_layer_sizes\": (75,)},\n    }\n)\n</code></pre> <p>Some estimators allow you to pass extra parameters to the fit method (besides X and y). This can be done adding <code>_fit</code> at the end of the parameter. For example, to change XGBoost's verbosity, we can run:</p> <pre><code>atom.run(models=\"XGB\", est_params={\"verbose_fit\": True})\n</code></pre> <p>Note</p> <p>If a parameter is specified through <code>est_params</code>, it's ignored by the study, even if it's added manually to <code>ht_params[\"distributions\"]</code>.</p> <p>Info</p> <p>The estimator's <code>n_jobs</code> and <code>random_state</code> parameters adopt atom's values (when available), unless specified through <code>est_params</code>.</p> <p></p>"}, {"location": "user_guide/training/#hyperparameter-tuning", "title": "Hyperparameter tuning", "text": "<p>In order to achieve maximum performance, it's important to tune an estimator's hyperparameters before training it. ATOM provides hyperparameter tuning through the optuna package. Just like optuna, we use the terms <code>study</code> and <code>trial</code> as follows:</p> <ul> <li>Study: optimization based on an objective function.</li> <li>Trial: a single execution of the objective function.</li> </ul> <p>Each trial is either computed by cross-validation on the complete training set or by randomly splitting the training set every iteration into a (sub)training and validation set. This process can create some minimum data leakage towards specific parameters (since the estimator is evaluated on data that is used to train the next estimator), but it ensures maximal use of the provided data. However, the leakage is not present in the independent test set, thus the final score of every model is unbiased. Note that, if the dataset is relatively small, the tuning's best score can consistently be lower than the final score on the test set due to the considerable lower fraction of instances on which it is trained. After finishing the study, the parameters that resulted in the best score are used to fit the final model on the complete training set.</p> <p>Info</p> <ul> <li>Unless specified differently by the user, the used samplers   are TPESampler   for single-metric runs and NSGAIISampler   for multi-metric runs.</li> <li>For multi-metric runs, the selected best trial   is the trial that performed best on the main metric. Use the property's   <code>@setter</code> to change it to any other trial. See the hyperparameter tuning   example.</li> </ul> <p>There are many possibilities to tune the study to your liking. The main parameter is <code>n_trials</code>, which determine the number of trials that are performed.</p> <p>Extra things to take into account:</p> <ul> <li>The train/validation splits are different per trial but equal for all models.</li> <li>Re-evaluating the objective function at the same point (with the same   hyperparameters) automatically skips the calculation and returns the   same score as the equivalent trial.</li> </ul> <p>Tip</p> <p>The hyperparameter tuning output can become quite wide for models with many hyperparameters. If you are working in a Jupyter Notebook, you can change the output's width running the following code in a cell: <pre><code>from IPython.display import display, HTML\ndisplay(HTML(\"&lt;style&gt;.container { width:100% !important; }&lt;/style&gt;\"))\n</code></pre></p> <p>Other settings can be changed through the <code>ht_params</code> parameter, a dictionary where every key-value combination can be used to further customize the optimization.</p> <p>By default, which hyperparameters are tuned and their corresponding distributions are predefined by ATOM. Use the 'distributions' key to customize these. Just like with <code>est_params</code>, it's possible to share the same parameters across models or use a dictionary with the model name as key to specify the parameters for every individual model. Use the key 'all' to tune some hyperparameters for all models when you also want to tune other parameters only for specific ones. The following example tunes the <code>n_estimators</code> parameter for both models but the <code>max_depth</code> parameter only for the RandomForest.</p> <pre><code>atom.run(\n    models=[\"ET\", \"RF\"],\n    n_trials=30,\n    ht_params={\"distributions\": {\"all\": \"n_estimators\", \"RF\": \"max_depth\"}},\n)\n</code></pre> <p>Like the <code>columns</code> parameter in atom's methods, you can exclude parameters from the optimization adding <code>!</code> before its name. It's possible to exclude multiple parameters, but not to combine inclusion and exclusion for the same model. For example, to optimize a RandomForest using all its predefined parameters except <code>n_estimators</code>, run:</p> <pre><code>atom.run(\n    models=\"ET\",\n    n_trials=15,\n    ht_params={\"distributions\": \"!n_estimators\"},\n)\n</code></pre> <p>If just the parameter name is provided, the predefined distribution is used. It's also possible to provide custom distributions spaces, but make sure they are compliant with optuna's API. See every model's individual documentation in ATOM's API section for an overview of their hyperparameters and distributions.</p> <pre><code>from optuna.distributions import (\n    IntDistribution, FloatDistribution, CategoricalDistribution\n)\n\natom.run(\n    models=[\"ET\", \"RF\"],\n    n_trials=30,\n    ht_params={\n        \"dimensions\": {\n            \"all\": {\"n_estimators\": IntDistribution(10, 100, step=10)},\n            \"RF\": {\n                \"max_depth\": IntDistribution(1, 10),\n                \"max_features\": CategoricalDistribution([\"sqrt\", \"log2\"]),\n           },\n        },\n    }\n)\n</code></pre> <p>Parameters for optuna's study and the study's optimize method can be added as kwargs to <code>ht_params</code>. For example, to use a different sampler or add a custom callback.</p> <pre><code>from optuna.samplers import RandomSampler\n\natom.run(\n    models=\"LR\",\n    n_trials=30,\n    ht_params={\n        \"sampler\": RandomSampler(seed=atom.random_state),\n        \"callbacks\": custom_callback(),\n    },\n)\n</code></pre> <p>Note</p> <ul> <li>If you use the default sampler, it\u2019s recommended to consider setting   larger <code>n_trials</code> to make full use of the characteristics of TPESampler   because TPESampler uses some (by default, 10) trials for its startup.</li> <li>When specifying distributions manually, make sure to import the   distribution types from optuna: <code>from optuna.distributions import ...</code>.</li> </ul> <p>Warning</p> <p>Keras' models can only use hyperparameter tuning when <code>n_jobs=1</code> or <code>ht_params={\"cv\": 1}</code>. Using n_jobs &gt; 1 and cv &gt; 1 raises a PicklingError due to incompatibilities of the APIs. Read here more about deep learning models.</p> <p>Tip</p> <p>ATOM has several plots that can help you examine a model's study and trials. Have a look at them here.</p> <p></p>"}, {"location": "user_guide/training/#pruning", "title": "Pruning", "text": "<p>During hyperparameter tuning, pruning stops unpromising trials at the early stages of the training (a.k.a., automated early-stopping). This can save the pipeline much time that would otherwise be wasted on an estimator that is unlikely to yield the best results. A pruned trial can't be selected as <code>best_trial</code>. Click here to see an example that uses pruning.</p> <p>The study uses MedianPruner as default pruner. You can use any other of optuna's pruners through the <code>ht_params</code> parameter.</p> <pre><code>from optuna.pruners import HyperbandPruner\n\natom.run(\"SGD\", n_trials=30, ht_params={\"pruner\": HyperbandPruner()})\n</code></pre> <p>Warning</p> <ul> <li>Pruning is disabled for multi-metric runs.</li> <li>Pruning is only available for models that support in-training validation.</li> </ul> <p></p>"}, {"location": "user_guide/training/#bootstrapping", "title": "Bootstrapping", "text": "<p>After fitting the estimator, you can assess the robustness of the model using the bootstrap technique. This technique creates several new data sets selecting random  samples from the training set (with replacement) and evaluates them on  the test set. This way you can get a distribution of the performance of the model. The sets are the same for every model. The number of sets can be chosen through the <code>n_bootstrap</code> parameter.</p> <p>Tip</p> <p>Use the plot_results method to plot the boostrap scores in a boxplot.</p> <p></p>"}, {"location": "user_guide/training/#successive-halving", "title": "Successive halving", "text": "<p>Successive halving is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, we recommend only to use this technique with similar models, e.g. only using tree-based models.</p> <p>Run successive halving from atom via the successive_halving method. Consecutive runs of the same model are saved with the model's acronym followed by the number of models in the run. For example, a RandomForest in a run with 4 models would become model <code>RF4</code>.</p> <p>See here a successive halving example.</p> <p>Tip</p> <p>Use the plot_successive_halving method to see every model's performance per iteration of the successive halving.</p> <p></p>"}, {"location": "user_guide/training/#train-sizing", "title": "Train sizing", "text": "<p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. Train sizing can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>Run train sizing from atom via the train_sizing method. The number of iterations and the number of samples per training can be specified with the <code>train_sizes</code> parameter. Consecutive runs of the same model are saved with the model's acronym followed by the fraction of rows in the training set (the <code>.</code> is removed from the fraction!). For example, a RandomForest in a run with 80% of the training samples would become model <code>RF08</code>.</p> <p>See here a train sizing example.</p> <p>Tip</p> <p>Use the plot_learning_curve method to see the model's performance per size of the training set.</p>"}]}
+{"config": {"lang": ["en"], "separator": "[\\s\\-]+", "pipeline": ["stopWordFilter"]}, "docs": [{"location": "about/", "title": "About", "text": ""}, {"location": "about/#what-is-it", "title": "What is it?", "text": "<p>Automated Tool for Optimized Modelling (ATOM) is an open-source Python package designed to help data scientists fasten up the exploration phase of their machine learning projects. ATOM is a low-code, easy-to-use library, capable of running experiments quickly and efficiently, enabling the user to go from raw data to generating insights in just a few lines of code. Click here to get started.</p> <p></p>"}, {"location": "about/#what-can-i-do-with-it", "title": "What can I do with it?", "text": "<p>ATOM is an end-to-end solution for machine learning pipelines. It supports the user from raw data ingestion to the final results' analysis and model deployment. Click on the icons to read more about its main functionalities.</p> Data cleaning Feature engineering Model selection Hyperparametertuning Model training Model predictions Experiment logging Analysis &amp;Interpretability"}, {"location": "about/#who-is-it-intended-for", "title": "Who is it intended for?", "text": "<ul> <li>Data scientists that want to fasten up the exploration phase of their machine   learning projects.</li> <li>Data scientists that want to run a simple modelling experiment without having   to spend too much time on coding.</li> <li>Data scientists that are new to Python and are not (yet) familiar with all   the relevant machine learning packages.</li> <li>Data analysts without extensive knowledge of machine learning that want to   try out model-based solutions.</li> <li>Anyone who wants to rapidly build a Proof of Concept, for example during a hackathon.</li> <li>Anyone who is new to the field of machine learning and wants a low-code,   easy to learn package, to get started building predictive pipelines.</li> </ul>"}, {"location": "about/#citing-atom", "title": "Citing ATOM", "text": "<p>If you use ATOM in a scientific publication, please consider citing this documentation page as the resource. ATOM\u2019s first stable release v2.0.3 was made publicly available in November 2019. A formatted version of the citation would look like this:</p> <p>ATOM v2.0.3, November 2019. URL https://tvdboom.github.io/ATOM/</p> <p>BibTeX entry:</p> <pre><code>@Manual{ATOM,\n    title = {ATOM: A Python package for fast exploration of machine learning pipelines},\n    author = {Mavs},\n    year={2019},\n    mont={November},\n    note = {ATOM version 2.0.3},\n    url = {https://tvdboom.github.io/ATOM/},\n}\n</code></pre> <p></p>"}, {"location": "about/#support", "title": "Support", "text": "<p>ATOM recognizes the support from JetBrains by providing core project contributors with a set of developer tools free of charge.</p> <p> </p> <p></p>"}, {"location": "about/#integrations", "title": "Integrations", "text": ""}, {"location": "contributing/", "title": "Contributing", "text": "<p>Are you interested in contributing to ATOM? Do you want to report a bug? Do you have a question? Before you do, please read the following guidelines.</p> <p></p>"}, {"location": "contributing/#submission-context", "title": "Submission context", "text": ""}, {"location": "contributing/#question-or-problem", "title": "Question or problem?", "text": "<p>For quick questions there's no need to open an issue. Check first if the question isn't already answered on the FAQ section. If not, reach us through the discussions page or on the slack channel.</p>"}, {"location": "contributing/#report-a-bug", "title": "Report a bug?", "text": "<p>If you found a bug in the source code, you can help by submitting an issue to the issue tracker in the GitHub repository. Even better, you can submit a Pull Request with a fix. However, before doing so, please read the submission guidelines.</p>"}, {"location": "contributing/#missing-a-feature", "title": "Missing a feature?", "text": "<p>You can request a new feature by submitting an issue to the GitHub Repository. If you would like to implement a new feature, please submit an issue with a proposal for your work first. Please consider what kind of change it is:</p> <ul> <li> <p>For a major feature, first open an issue and outline your proposal so   that it can be discussed. This will also allow us to better coordinate our   efforts, prevent duplication of work, and help you to craft the change so   that it is successfully accepted into the project.</p> </li> <li> <p>Small features and bugs can be crafted and directly submitted as a Pull   Request. However, there is no guarantee that your feature will make it into   <code>master</code>, as it's always a matter of opinion whether if benefits the   overall functionality of the project.</p> </li> </ul>"}, {"location": "contributing/#project-layout", "title": "Project layout", "text": "<p>The latest stable release of ATOM is on the <code>master</code> branch, whereas the latest version of ATOM in development is on the <code>development</code> branch. Make sure you are looking at and working on the correct branch if you're looking to contribute code.</p> <p>In terms of directory structure:</p> <ul> <li>All of ATOM's code sources are in the <code>atom</code> directory.</li> <li>The documentation sources are in the <code>docs_sources</code> directory.</li> <li>Images in the documentation are in the <code>docs_sources/img</code> directory.</li> <li>Tutorial notebooks are in the <code>examples</code> directory. If you want to   include the example to the documentation as well, add the <code>.ipynb</code> file   to <code>docs_sources/examples</code> and update the <code>mkdocs.yml</code> file accordingly.</li> <li>Unit tests are in the <code>tests</code> directory. Make sure to add the tests to the   file corresponding to the module in the <code>atom</code> directory with the code that   is being tested.</li> </ul> <p>Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the <code>development</code> branch.</p> <p></p>"}, {"location": "contributing/#submission-guidelines", "title": "Submission guidelines", "text": ""}, {"location": "contributing/#submitting-an-issue", "title": "Submitting an issue", "text": "<p>Before you submit an issue, please search the issue tracker, maybe an issue for your problem already exists and the discussion might inform you of workarounds readily available.</p> <p>We want to fix all the issues as soon as possible, but before fixing a bug we need to reproduce and confirm it. In order to reproduce bugs we will systematically ask you to provide a minimal reproduction scenario using the custom issue template.</p>"}, {"location": "contributing/#submitting-a-pull-request", "title": "Submitting a pull request", "text": "<p>Before you submit a pull request, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.</p> <ul> <li>Update the documentation so all of your changes are reflected there.</li> <li>Adhere to PEP 8 standards.</li> <li>Use a maximum of 90 characters per line. Try to keep docstrings below   74 characters.</li> <li>Update the project unit tests to test your code changes as thoroughly   as possible.</li> <li>Make sure that your code is properly commented with docstrings and   comments explaining your rationale behind non-obvious coding practices.</li> <li>Run isort: <code>isort atom tests</code>.</li> <li>Run flake8: <code>flake8 --show-source --statistics atom tests</code>.</li> </ul> <p>If your contribution requires a new library dependency:</p> <ul> <li>Double-check that the new dependency is easy to install via pip and Anaconda.</li> <li>The library should support Python 3.8 and higher.</li> <li>Make sure the code works with the latest version of the library.</li> <li>Update the dependencies in the documentation.</li> <li>Add the library with the minimum required version to <code>pyproject.toml</code>.</li> </ul> <p>After submitting your pull request, GitHub will automatically run the tests on your changes and make sure that the updated code builds successfully. The checks are run on Python 3.8, 3.9, 3.10 and 3.11, on Ubuntu and Windows. We also use services that automatically check code style and test coverage.</p>"}, {"location": "dependencies/", "title": "Dependencies", "text": ""}, {"location": "dependencies/#python-os", "title": "Python &amp; OS", "text": "<p>As of the moment, ATOM supports the following Python versions:</p> <ul> <li>Python 3.8</li> <li>Python 3.9</li> <li>Python 3.10</li> <li>Python 3.11</li> </ul> <p>And operating systems:</p> <ul> <li>Linux (Ubuntu, Fedora, etc...)</li> <li>Windows 8.1+</li> <li>macOS (not tested)</li> </ul> <p></p>"}, {"location": "dependencies/#packages", "title": "Packages", "text": ""}, {"location": "dependencies/#required", "title": "Required", "text": "<p>ATOM is built on top of several existing Python libraries. These packages are necessary for its correct functioning.</p> <ul> <li>category-encoders (&gt;=2.6.1)</li> <li>dagshub (&gt;=0.2.10)</li> <li>dill (&gt;=0.3.6)</li> <li>gplearn (&gt;=0.4.2)</li> <li>imbalanced-learn (&gt;=0.11.0)</li> <li>ipython (&gt;=8.11.0)</li> <li>featuretools (&gt;=1.27.0)</li> <li>joblib (&gt;=1.3.1)</li> <li>matplotlib (&gt;=3.7.2)</li> <li>mlflow (&gt;=2.5.0)</li> <li>modin[ray] (&gt;=0.23.0)</li> <li>nltk (&gt;=3.8.1)</li> <li>numpy (&gt;=1.23.0)</li> <li>optuna (&gt;=3.2.0)</li> <li>pandas[parquet] (&gt;=2.0.3)</li> <li>plotly (&gt;=5.15.0)</li> <li>ray[serve] (&gt;=2.6.1)</li> <li>scikit-learn (&gt;=1.3.0)</li> <li>scikit-learn-intelex (&gt;=2023.2.1)</li> <li>scipy (&gt;=1.10.1)</li> <li>shap (&gt;=0.42.1)</li> <li>sktime (&gt;=0.20.1)</li> <li>typeguard (&gt;=4.1.3)</li> <li>zoofs (&gt;=0.1.26)</li> </ul>"}, {"location": "dependencies/#optional", "title": "Optional", "text": "<p>Some specific models, utility methods or plots require the installation of additional libraries. You can install all the optional dependencies using <code>pip install atom-ml[full]</code>. Doing so also installs the following libraries:</p> <ul> <li>botorch (&gt;=0.8.5)</li> <li>catboost (&gt;=1.2)</li> <li>explainerdashboard (&gt;=0.4.3)</li> <li>evalml (&gt;=0.79.0)</li> <li>gradio (&gt;=3.19.1)</li> <li>lightgbm (&gt;=3.3.5)</li> <li>pmdarima (&gt;=2.0.3)</li> <li>schemdraw (&gt;=0.16)</li> <li>wordcloud (&gt;=1.9.2)</li> <li>xgboost (&gt;=1.7.4)</li> <li>ydata-profiling (&gt;=4.5.1)</li> </ul>"}, {"location": "dependencies/#development", "title": "Development", "text": "<p>The development dependencies are not installed with the package, and are not required for any of its functionalities. These libraries are only necessary to contribute to the project. Install them using <code>pip install atom-ml[dev]</code>.</p> <p>Linting</p> <ul> <li>isort (&gt;=5.12.0)</li> <li>flake8 (&gt;=6.0.0)</li> <li>flake8-pyproject (&gt;=1.2.2)</li> </ul> <p>Testing</p> <ul> <li>nbmake (&gt;=1.4.1)</li> <li>pytest (&gt;=7.2.1)</li> <li>pytest-cov (&gt;=4.0.0)</li> <li>pytest-xdist (&gt;=3.2.0)</li> <li>scikeras (&gt;=0.11.0)</li> <li>tensorflow (&gt;=2.13.0)</li> </ul> <p>Documentation</p> <ul> <li>mike (&gt;=1.1.2)</li> <li>mkdocs (&gt;=1.4.2)</li> <li>mkdocs-autorefs (&gt;=0.5.0)</li> <li>mkdocs-jupyter (&gt;=0.22.0)</li> <li>mkdocs-material (&gt;=9.1.21)</li> <li>mkdocs-material-extensions (&gt;=1.1.1)</li> <li>mkdocs-simple-hooks (&gt;=0.1.5)</li> <li>pymdown-extensions (&gt;=9.9.2)</li> <li>pyyaml (&gt;=6.0)</li> </ul>"}, {"location": "faq/", "title": "Frequently asked questions", "text": "<p>Here we try to give answers to some questions that have popped up regularly. If you have any other questions, don't hesitate to create a new discussion or post them on the Slack channel! </p> <p>??? faq Is this package related to the Atom text editor?\"     There is, indeed, a text editor with the same name and a similar logo as     this package. Is this a shameless copy? No. When I started the project,     I didn't know about the text editor, and it doesn't require much thinking     to come up with the idea of replacing the letter O of the word atom with     the image of an atom.</p> How does ATOM relate to AutoML? <p>ATOM is not an AutoML tool since it does not automate the search for an optimal pipeline like well known AutoML tools such as auto-sklearn or EvalML do. Instead, ATOM helps the user find the optimal pipeline himself. One of the goals of this package is to help data scientists produce explainable pipelines, and using an AutoML black box function would impede that. That said, it is possible to integrate a EvalML pipeline with atom through the automl method.</p> Is it possible to run deep learning models? <p>Yes. Deep learning models can be added as custom models to the pipeline as long as they follow sklearn's API. For more information, see the deep learning section of the user guide.</p> Can I run atom's methods on just a subset of the columns? <p>Yes, all data cleaning and feature engineering methods accept a <code>columns</code> parameter to only transform the selected features. For example, to only impute the numerical columns in the dataset we could type <code>atom.impute(strat_num=\"mean\", columns=atom.numerical)</code>. The parameter accepts column names, column indices, dtypes or a slice object.</p> How can I compare the same model on different datasets? <p>In many occasions you might want to test how a model performs on datasets processed with different pipelines. For this, atom has the branch system. Create a new branch for every new pipeline you want to test and use the plot methods to compare all models, independent of the branch it was trained on.</p> Can I train models through atom using a GPU? <p>Yes. Refer to the user guide to see what algorithms and models have a GPU implementation. Be aware that it could require additional software and hardware dependencies.</p> How are numerical and categorical columns differentiated? <p>The columns are separated using a dataframe's select_dtypes method. Numerical columns are selected using <code>include=\"number\"</code> whereas categorical columns are selected using <code>exclude=\"number\"</code>.</p> Can I run unsupervised learning pipelines? <p>No. As for now, ATOM only supports supervised machine learning pipelines. However, various unsupervised algorithms can be chosen as strategy in the Pruner class to detect and remove outliers from the dataset.</p> Is there a way to plot multiple models in the same shap plot? <p>No. Unfortunately, there is no way to plot multiple models in the same shap plot since the plots are made by the shap package and passed as <code>matplotlib.axes</code> objects to atom. This means that it's not within the reach of this package to implement such a utility.</p> Can I merge a sklearn pipeline with atom? <p>Yes. Like any other transformer, it is possible to add a sklearn pipeline to atom using the add method. Every transformer in the pipeline is merged independently. The pipeline is not allowed to end with a model since atom manages its own models. If that is the case, add the pipeline using <code>atom.add(pipeline[:-1])</code>.</p> Is it possible to initialize atom with an existing train and test set? <p>Yes. If you already have a separated train and test set you can initialize atom in two ways:</p> <ul> <li><code>atom = ATOMClassifier(train, test)</code></li> <li><code>atom = ATOMClassifier((X_train, y_train), (X_test, y_test))</code></li> </ul> <p>Make sure the train and test size have the same number of columns! If atom is initialized in any of these two ways, the <code>test_size</code> parameter is ignored.</p> Can I train the models using cross-validation? <p>Applying cross-validation means transforming every step of the pipeline multiple times, each with different results. Doing this would prevent ATOM from being able to show the transformation results after every pre-processing step, which means losing the ability to inspect how a transformer changed the dataset. For this reason, it is not possible to apply cross-validation until after a model has been trained. After a model has been trained, the pipeline is defined, and cross-validation can be applied using the cross_validate method. See here an example using cross-validation.</p> Is there a way to process datetime features? <p>Yes, the FeatureExtractor class can automatically extract useful features (day, month, year, etc...) from datetime columns. The extracted features are always encoded to numerical values, so they can be fed directly to a model.</p>"}, {"location": "getting_started/", "title": "Getting started", "text": ""}, {"location": "getting_started/#installation", "title": "Installation", "text": "<p>Install ATOM's newest release easily via <code>pip</code>:</p> <pre><code>pip install -U atom-ml\n</code></pre> <p>or via <code>conda</code>:</p> <pre><code>conda install -c conda-forge atom-ml\n</code></pre> <p>Note</p> <p>Since atom was already taken, download the package under the name <code>atom-ml</code>!</p> <p>Warning</p> <p>ATOM makes use of many other ML libraries, making its dependency list quite long. Because of that, the installation may take longer than you are accustomed to. Be patient!</p> <p></p> <p>Optional dependencies</p> <p>Some specific models, utility methods or plots require the installation of additional libraries. To install the optional dependencies, add <code>[full]</code> after the package's name.</p> <pre><code>pip install -U atom-ml[full]\n</code></pre> <p></p> <p>Latest source</p> <p>Sometimes, new features and bug fixes are already implemented in the <code>development</code> branch, but waiting for the next release to be made available. If you can't wait for that, it's possible to install the package directly from git.</p> <pre><code>pip install git+https://github.com/tvdboom/ATOM.git@development#egg=atom-ml\n</code></pre> <p>Don't forget to include <code>#egg=atom-ml</code> to explicitly name the project, this way pip can track metadata for it without having to have run the <code>setup.py</code> script.</p> <p></p> <p>Contributing</p> <p>If you are planning to contribute to the project, you'll need the development dependencies. Install them adding <code>[dev]</code> after the package's name.</p> <pre><code>pip install -U atom-ml[dev]\n</code></pre> <p>Click here for a complete list of package files for all versions published on PyPI.</p> <p></p>"}, {"location": "getting_started/#usage", "title": "Usage", "text": "<p>ATOM contains a variety of classes and functions to perform data cleaning, feature engineering, model training, plotting and much more. The easiest way to use everything ATOM has to offer is through one of the main classes:</p> <ul> <li>ATOMClassifier for classification tasks.</li> <li>ATOMForecaster for forecasting tasks.</li> <li>ATOMRegressor for regression tasks.</li> </ul> <p>Let's walk you through an example. Click on the SageMaker Studio Lab badge on top of this section to run this example yourself.</p> <p>Make the necessary imports and load the data.</p> <pre><code>import pandas as pd\nfrom atom import ATOMClassifier\n\n# Load the Australian Weather dataset\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)\nprint(X.head())\n</code></pre> <p>Initialize the ATOMClassifier or ATOMRegressor class. These two classes are convenient wrappers for the whole machine learning pipeline. Contrary to sklearn's API, they are initialized providing the data you want to manipulate.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\", verbose=2)\n</code></pre> <p>Data transformations are applied through atom's methods. For example, calling the impute method will initialize an Imputer instance, fit it on the training set and transform the whole dataset. The transformations are applied immediately after calling the method (no fit and transform commands necessary).</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\natom.verbose = 2  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  \natom.encode(strategy=\"Target\", max_onehot=8)\n</code></pre> <p>Similarly, models are trained and evaluated using the run method. Here, we fit both a LogisticRegression and LinearDiscriminantAnalysis model, and apply hyperparameter tuning.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  # hide \natom.encode(strategy=\"Target\", max_onehot=8)  # hide\natom.verbose = 2  # hide\n\natom.run(models=[\"LR\", \"LDA\"], metric=\"auc\", n_trials=6)\n</code></pre> <p>And lastly, analyze the results.</p> <pre><code>import pandas as pd  # hide\nfrom atom import ATOMClassifier  # hide\nX = pd.read_csv(\"./examples/datasets/weatherAUS.csv\", nrows=100)  # hide\n\natom = ATOMClassifier(X, y=\"RainTomorrow\")  # hide\n\natom.impute(strat_num=\"median\", strat_cat=\"most_frequent\")  # hide \natom.encode(strategy=\"Target\", max_onehot=8)  # hide\n\natom.run(models=[\"LR\", \"LDA\"], metric=\"auc\", n_trials=6)  # hide\n\nprint(atom.evaluate())\n\natom.plot_lift()\n</code></pre>"}, {"location": "license/", "title": "MIT License", "text": "<p>Copyright \u00a9 2023 Mavs</p> <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:</p> <p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.</p> <p>THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.</p>"}, {"location": "API/ATOM/atomclassifier/", "title": "ATOMClassifier", "text": "<p>class atom.api.ATOMClassifier(*arrays, y=-1, index=False, shuffle=True, stratify=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for classification tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing features and target. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the target column is provided through <code>arrays</code>.</p> <p>index: bool, int, str or sequence, default=False Handle the index in the resulting dataframe. <ul> <li>If False: Reset to RangeIndex.</li> <li>If True: Use the provided index.</li> <li>If int: Position of the column to use as index.</li> <li>If str: Name of the column to use as index.</li> <li>If sequence: Array with shape=(n_samples,) to use as index.</li> </ul> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>shuffle: bool, default=True Whether to shuffle the dataset before splitting the train and test set. Be aware that not shuffling the dataset can cause an unequal distribution of target classes over the sets. <p>stratify: bool, int, str or sequence, default=True Handle stratification of the target classes over the data sets. <ul> <li>If False: The data is split randomly.</li> <li>If True: The data is stratified over the target column.</li> <li>Else: Name or position of the columns to stratify by. The   columns can't contain <code>NaN</code> values.</li> </ul> <p>This parameter is ignored if <code>shuffle=False</code> or if the test set is provided through <code>arrays</code>.</p> <p>For multioutput tasks, stratification is applied to the joint target columns.</p> <p>n_rows: int or float, default=1 Random subsample of the dataset to use. The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>ATOMRegressor Main class for regression tasks.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Initialize atom\natom = ATOMClassifier(X, y, verbose=2)\n\n# Apply data cleaning and feature engineering methods\natom.balance(strategy=\"smote\")\natom.feature_selection(strategy=\"rfe\", solver=\"lr\", n_features=22)\n\n# Train models\natom.run(models=[\"LR\", \"RF\", \"XGB\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomclassifier/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: intNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: indexNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: indexNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: pd.Series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.classes: pd.DataFrame | NoneDistribution of target classes per data set.n_classes: int | series | NoneNumber of classes in the target column(s).</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply with signature <code>func(dataset, **kw_args) -&gt; dataset</code>. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns, outliers and unbalanced datasets. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>balanceBalance the number of rows per class in the target column.cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method balance(strategy=\"adasyn\", **kwargs)[source]Balance the number of rows per class in the target column.</p> <p>When oversampling, the newly created samples have an increasing integer index for numerical indices, and an index of the form [estimator]_N for non-numerical indices, where N stands for the N-th sample in the data set.</p> <p>See the Balancer class for a description of the parameters.</p> <p>Note</p> <ul> <li>The balance method does not support multioutput tasks.</li> <li>This transformation is only applied to the training set   in order to maintain the original distribution of target   classes in the test set.</li> </ul> <p>Tip</p> <p>Use atom's classes attribute for an overview of the target class distribution per data set.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomclassifier/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/", "title": "ATOMForecaster", "text": "<p>class atom.api.ATOMForecaster(*arrays, y=-1, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for forecasting tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing exogeneous features and time series. Allowed formats are: <ul> <li>X</li> <li>y</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Exogeneous feature set corresponding to y, with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Time series.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Time series. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the time series is provided through <code>arrays</code>.</p> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>n_rows: int or float, default=1 Subsample of the dataset to use. The cut is made from the head of the dataset (older entries are dropped when sorted by date ascending). The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>ATOMRegressor Main class for regression tasks.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\n# Initialize atom\natom = ATOMForecaster(y, verbose=2)\n\n# Train models\natom.run(models=[\"NF\", \"ES\", \"ETS\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomforecaster/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: intNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: indexNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: indexNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: pd.Series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply with signature <code>func(dataset, **kw_args) -&gt; dataset</code>. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns and outliers. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomforecaster/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/ATOM/atommodel/", "title": "ATOMModel", "text": "<p>function atom.api.ATOMModel(estimator, name=None, acronym=None, needs_scaling=False, native_multilabel=False, native_multioutput=False, has_validation=None)[source]Convert an estimator to a model that can be ingested by atom.</p> <p>This function adds the relevant attributes to the estimator so that they can be used by atom. Note that only estimators that follow sklearn's API are compatible.</p> <p>Read more about using custom models in the user guide.</p> <p>Parametersestimator: Predictor Custom estimator. Should implement a <code>fit</code> and <code>predict</code> method. <p>name: str or None, default=None Name for the model. This is the value used to call the model from atom. The value should start with the model's <code>acronym</code> when specified. If None, the capital letters of the estimator's name are used (only if two or more, else it uses the entire name). <p>acronym: str or None, default=None Model's acronym. If None, it uses the model's <code>name</code>. Specify this parameter when you want to train multiple custom models that share the same estimator. <p>needs_scaling: bool, default=False Whether the model should use automated feature scaling. <p>native_multilabel: bool, default=False Whether the model has native support for multilabel tasks. If False and the task is multilabel, a multilabel meta-estimator is wrapper around the estimator. <p>native_multioutput: bool, default=False Whether the model has native support for multioutput tasks. If False and the task is multiouput, a multiotuput meta-estimator is wrapper around the estimator. <p>has_validation: str or None, default=None Whether the model allows in-training validation. If str, name of the estimator's parameter that states the number of iterations. If None, no support for in-training validation. <p>Returnsestimator Clone of the provided estimator with custom attributes. <p></p>"}, {"location": "API/ATOM/atommodel/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor, ATOMModel\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nransac = ATOMModel(\n    estimator=RANSACRegressor(),\n    name=\"RANSAC\",\n    needs_scaling=False,\n)\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y, verbose=2)\natom.run(ransac)\n</code></pre>"}, {"location": "API/ATOM/atomregressor/", "title": "ATOMRegressor", "text": "<p>class atom.api.ATOMRegressor(*arrays, y=-1, index=False, shuffle=True, n_rows=1, test_size=0.2, holdout_size=None, n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Main class for regression tasks.</p> <p>Apply all data transformations and model management provided by the package on a given dataset. Note that, contrary to sklearn's API, the instance contains the dataset on which to perform the analysis. Calling a method will automatically apply it on the dataset it contains.</p> <p>All data cleaning, feature engineering, model training and plotting functionality can be accessed from an instance of this class.</p> <p>Parameters*arrays: sequence of indexables Dataset containing features and target. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>y: int, str, dict, sequence or dataframe, default=-1 Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>This parameter is ignored if the target column is provided through <code>arrays</code>.</p> <p>index: bool, int, str or sequence, default=False Handle the index in the resulting dataframe. <ul> <li>If False: Reset to RangeIndex.</li> <li>If True: Use the provided index.</li> <li>If int: Position of the column to use as index.</li> <li>If str: Name of the column to use as index.</li> <li>If sequence: Array with shape=(n_samples,) to use as index.</li> </ul> <p>test_size: int or float, default=0.2 <ul> <li>If &lt;=1: Fraction of the dataset to include in the test set.</li> <li>If &gt;1: Number of rows to include in the test set.</li> </ul> <p>This parameter is ignored if the test set is provided through <code>arrays</code>.</p> <p>holdout_size: int, float or None, default=None <ul> <li>If None: No holdout data set is kept apart.</li> <li>If &lt;=1: Fraction of the dataset to include in the holdout set.</li> <li>If &gt;1: Number of rows to include in the holdout set.</li> </ul> <p>This parameter is ignored if the holdout set is provided through <code>arrays</code>.</p> <p>shuffle: bool, default=True Whether to shuffle the dataset before splitting the train and test set. Be aware that not shuffling the dataset can cause an unequal distribution of target classes over the sets. <p>n_rows: int or float, default=1 Random subsample of the dataset to use. The default value selects all rows. <ul> <li>If &lt;=1: Fraction of the dataset to select.</li> <li>If &gt;1: Exact number of rows to select. Only if <code>arrays</code> is X          or X, y.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\n# Initialize atom\natom = ATOMRegressor(X, y, verbose=2)\n\n# Apply data cleaning and feature engineering methods\natom.scale()\natom.feature_selection(strategy=\"rfecv\", solver=\"xgb\", n_features=12)\n\n# Train models\natom.run(models=[\"OLS\", \"RF\", \"XGB\"])\n\n# Analyze the results\nprint(atom.results)\n\nprint(atom.evaluate())\n</code></pre>"}, {"location": "API/ATOM/atomregressor/#magic-methods", "title": "Magic methods", "text": "<p>The class contains some magic methods to help you access some of its elements faster. Note that methods that apply on the pipeline can return different results per branch.</p> <ul> <li>__repr__: Prints an overview of atom's branches, models and metric.</li> <li>__len__: Returns the length of the dataset.</li> <li>__iter__: Iterate over the pipeline's transformers.</li> <li>__contains__: Checks if the provided item is a column in the dataset.</li> <li>__getitem__: Access a branch, model, column or subset of the dataset.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/ATOM/atomregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).scaled: boolWhether the feature set is scaled. <p>A data set is considered scaled when it has mean=0 and std=1, or when there is a scaler in the pipeline. Binary columns (only 0s and 1s) are excluded from the calculation.duplicates: intNumber of duplicate rows in the dataset.missing: listValues that are considered \"missing\". <p>These values are used by the clean and impute methods. Default values are: None, NaN, NaT, +inf, -inf, \"\", \"?\", \"None\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"inf\". Note that None, NaN, +inf and -inf are always considered missing since they are incompatible with sklearn estimators.nans: series | NoneColumns with the number of missing values in them.n_nans: int | NoneNumber of samples containing missing values.numerical: indexNames of the numerical features in the dataset.n_numerical: intNumber of numerical features in the dataset.categorical: indexNames of the categorical features in the dataset.n_categorical: intNumber of categorical features in the dataset.outliers: pd.Series | NoneColumns in training set with amount of outlier values.n_outliers: int | NoneNumber of samples in the training set containing outliers.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesbranch: BranchCurrent active branch. <p>Use the property's <code>@setter</code> to change the branch or to create a new one. If the value is the name of an existing branch, switch to that one. Else, create a new branch using that name. The new branch is split from the current branch. Use <code>__from__</code> to split the new branch from any other existing branch. Read more in the user guide.models: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/ATOM/atomregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/ATOM/atomregressor/#utility-methods", "title": "Utility methods", "text": "<p>Next to the plotting methods, the class contains a variety of utility methods to handle the data and manage the pipeline.</p> <p>addAdd a transformer to the pipeline.applyApply a function to the dataset.automlSearch for an optimized pipeline in an automated fashion.available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.distributionGet statistics on column distributions.edaCreate an Exploratory Data Analysis report.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_sample_weightReturn sample weights for a balanced data set.inverse_transformInversely transform new data through the pipeline.loadLoads an atom instance from a pickle file.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.resetReset the instance to it's initial state.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_dataSave the data in the current branch to a <code>.csv</code> file.shrinkConverts the columns to the smallest possible matching dtype.stackingAdd a Stacking model to the pipeline.statsDisplay basic information about the dataset.statusGet an overview of the branches and models.transformTransform new data through the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method add(transformer, columns=None, train_only=False, **fit_params)[source]Add a transformer to the pipeline.</p> <p>If the transformer is not fitted, it is fitted on the complete training set. Afterwards, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom.</p> <p>Warning</p> <ul> <li>The transformer should have fit and/or transform methods   with arguments <code>X</code> (accepting a dataframe-like object of   shape=(n_samples, n_features)) and/or <code>y</code> (accepting a   sequence of shape=(n_samples,)).</li> <li>The transform method should return a feature set as a   dataframe-like object of shape=(n_samples, n_features)   and/or a target column as a sequence of shape=(n_samples,).</li> </ul> <p>Note</p> <p>If the transform method doesn't return a dataframe:</p> <ul> <li>The column naming happens as follows. If the transformer   has a <code>get_feature_names_out</code> or <code>get_feature_names</code>   method, it is used. If not, and it returns the same number   of columns, the names are kept equal. If the number of   columns change, old columns will keep their name (as long   as the column is unchanged) and new columns will receive   the name <code>x[N-1]</code>, where N stands for the n-th feature.   This means that a transformer should only transform, add   or drop columns, not combinations of these.</li> <li>The index remains the same as before the transformation.   This means that the transformer should not add, remove or   shuffle rows unless it returns a dataframe.</li> </ul> <p>Note</p> <p>If the transformer has a <code>n_jobs</code> and/or <code>random_state</code> parameter that is left to its default value, it adopts atom's value.</p> <p>Parameterstransformer: Transformer Estimator to add to the pipeline. Should implement a <code>transform</code> method. <p>columns: int, str, slice, sequence or None, default=None Names, indices or dtypes of the columns in the dataset to transform. Only select features or the target column, not both at the same time (if that happens, the target column is ignored). If None, transform all columns. Add <code>!</code> in front of a name or dtype to exclude that column, e.g. <code>atom.add(Transformer(), columns=\"!Location\")</code><code>transforms all columns except</code>Location`. You can either include or exclude columns, not combinations of these. <p>train_only: bool, default=False Whether to apply the estimator only on the training set or on the complete dataset. Note that if True, the transformation is skipped when making predictions on new data. <p>**fit_params Additional keyword arguments for the transformer's fit method. </p> <p></p> <p>method apply(func, inverse_func=None, kw_args=None, inv_kw_args=None, **kwargs)[source]Apply a function to the dataset.</p> <p>This method is useful for stateless transformations such as taking the log, doing custom scaling, etc...</p> <p>Note</p> <p>This approach is preferred over changing the dataset directly through the property's <code>@setter</code> since the transformation is stored in the pipeline.</p> <p>Tip</p> <p>Use <code>atom.apply(lambda df: df.drop(\"column_name\",axis=1))</code> to store the removal of columns in the pipeline.</p> <p>Parametersfunc: callable Function to apply with signature <code>func(dataset, **kw_args) -&gt; dataset</code>. <p>inverse_func: callable or None, default=None Inverse function of <code>func</code>. If None, the inverse_transform method returns the input unchanged. <p>kw_args: dict or None, default=None Additional keyword arguments for the function. <p>inv_kw_args: dict or None, default=None Additional keyword arguments for the inverse function. </p> <p></p> <p>method automl(**kwargs)[source]Search for an optimized pipeline in an automated fashion.</p> <p>Automated machine learning (AutoML) automates the selection, composition and parameterization of machine learning pipelines. Automating the machine learning often provides faster, more accurate outputs than hand-coded algorithms. ATOM uses the evalML package for AutoML optimization. The resulting transformers and final estimator are merged with atom's pipeline (check the <code>pipeline</code> and <code>models</code> attributes after the method finishes running). The created AutoMLSearch instance can be accessed through the <code>evalml</code> attribute.</p> <p>Warning</p> <p>AutoML algorithms aren't intended to run for only a few minutes. The method may need a very long time to achieve optimal results.</p> <p>Parameters**kwargs Additional keyword arguments for the AutoMLSearch instance. </p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method distribution(distributions=None, columns=None)[source]Get statistics on column distributions.</p> <p>Compute the Kolmogorov-Smirnov test for various distributions against columns in the dataset. Only for numerical columns. Missing values are ignored.</p> <p>Tip</p> <p>Use the plot_distribution method to plot a column's distribution.</p> <p>Parametersdistributions: str, sequence or None, default=None Names of the distributions in <code>scipy.stats</code> to get the statistics on. If None, a selection of the most common ones is used. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to perform the test on. If None, select all numerical columns. <p>Returnspd.DataFrame Statistic results with multiindex levels: <ul> <li>dist: Name of the distribution.</li> <li>stat: Statistic results:<ul> <li>score: KS-test score.</li> <li>p_value: Corresponding p-value. </li> </ul> </li> </ul> <p></p> <p>method eda(dataset=\"dataset\", n_rows=None, filename=None, **kwargs)[source]Create an Exploratory Data Analysis report.</p> <p>ATOM uses the ydata-profiling package for the EDA. The report is rendered directly in the notebook. The created ProfileReport instance can be accessed through the <code>report</code> attribute.</p> <p>Warning</p> <p>This method can be slow for large datasets.</p> <p>Parametersdataset: str, default=\"dataset\" Data set to get the report from. <p>n_rows: int or None, default=None Number of (randomly picked) rows to process. None to use all rows. <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ProfileReport instance. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_sample_weight(dataset=\"train\")[source]Return sample weights for a balanced data set.</p> <p>The returned weights are inversely proportional to the class frequencies in the selected data set. For multioutput tasks, the weights of each column of <code>y</code> will be multiplied.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsseries Sequence of weights with shape=(n_samples,). </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be used to transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>function atom.atom.load(filename, data=None, transform_data=True, verbose=None)[source]Loads an atom instance from a pickle file.</p> <p>If the instance was saved using <code>save_data=False</code>, it's possible to load new data into it and apply all data transformations.</p> <p>Note</p> <p>The loaded instance's current branch is the same branch as it was when saved.</p> <p>Parametersfilename: str Name of the pickle file. <p>data: sequence of indexables or None, default=None Original dataset. Only use this parameter if the loaded file was saved using <code>save_data=False</code>. Allowed formats are: <ul> <li>X</li> <li>X, y</li> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>X, train, test: dataframe-like Feature set with shape=(n_samples, n_features).</p> <p>y: int, str or sequence Target column corresponding to X.</p> <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>transform_data: bool, default=True If False, the <code>data</code> is left as provided. If True, it's transformed through all the steps in the loaded instance's pipeline. <p>verbose: int or None, default=None Verbosity level of the transformations applied on the new data. If None, use the verbosity from the loaded instance. This parameter is ignored if <code>transform_data=False</code>. <p>Returnsatom instance Unpickled atom instance. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset()[source]Reset the instance to it's initial state.</p> <p>Deletes all branches and models. The dataset is also reset to its form after initialization.</p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_data(filename=\"auto\", dataset=\"dataset\", **kwargs)[source]Save the data in the current branch to a <code>.csv</code> file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>dataset: str, default=\"dataset\" Data set to save. <p>**kwargs Additional keyword arguments for pandas' to_csv method. </p> <p></p> <p>method shrink(int2bool=False, int2uint=False, str2cat=False, dense2sparse=False, columns=None)[source]Converts the columns to the smallest possible matching dtype.</p> <p>Parametersint2bool: bool, default=False Whether to convert <code>int</code> columns to <code>bool</code> type. Only if the values in the column are strictly in (0, 1) or (-1, 1). <p>int2uint: bool, default=False Whether to convert <code>int</code> to <code>uint</code> (unsigned integer). Only if the values in the column are strictly positive. <p>str2cat: bool, default=False Whether to convert <code>string</code> to <code>category</code>. Only if the number of categories would be less than 30% of the length of the column. <p>dense2sparse: bool, default=False Whether to convert all features to sparse format. The value that is compressed is the most frequent value in the column. <p>columns: int, str, slice, sequence or None, default=None Names, positions or dtypes of the columns in the dataset to shrink. If None, transform all columns. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method stats(_vb=-2)[source]Display basic information about the dataset.</p> <p>Parameters_vb: int, default=-2 Internal parameter to always print if called by user. </p> <p></p> <p>method status()[source]Get an overview of the branches and models.</p> <p>This method prints the same information as the __repr__ and also saves it to the logger.</p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be  of use to, for example, transform only the target column.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#data-cleaning", "title": "Data cleaning", "text": "<p>The data cleaning methods can help you scale the data, handle missing values, categorical columns and outliers. All attributes of the data cleaning classes are attached to atom after running. Read more in the user guide.</p> <p>Tip</p> <p>Use the eda method to examine the data and help you determine suitable parameters for the data cleaning methods.</p> <p>cleanApplies standard data cleaning steps on the dataset.discretizeBin continuous data into intervals.encodePerform encoding of categorical features.imputeHandle missing values in the dataset.normalizeTransform the data to follow a Normal/Gaussian distribution.prunePrune outliers from the training set.scaleScale the data.</p> <p></p> <p>method clean(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, **kwargs)[source]Applies standard data cleaning steps on the dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column (ignored for regression tasks).</li> </ul> <p>See the Cleaner class for a description of the parameters.</p> <p></p> <p>method discretize(strategy=\"quantile\", bins=5, labels=None, **kwargs)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they will define the intervals. Ignores numerical columns.</p> <p>See the Discretizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to visualize a column's distribution and decide on the bins.</p> <p></p> <p>method encode(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"rare\", **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Rare classes can be replaced with a value in order to prevent too high cardinality.</p> <p>See the Encoder class for a description of the parameters.</p> <p>Note</p> <p>This method only encodes the categorical features. It does not encode the target column! Use the clean method for that.</p> <p>Tip</p> <p>Use the categorical attribute  for a list of the categorical features in the dataset.</p> <p></p> <p>method impute(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, **kwargs)[source]Handle missing values in the dataset.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>See the Imputer class for a description of the parameters.</p> <p>Tip</p> <p>Use the nans attribute to check the amount of missing values per column.</p> <p></p> <p>method normalize(strategy=\"yeojohnson\", **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Ignores categorical columns.</p> <p>See the Normalizer class for a description of the parameters.</p> <p>Tip</p> <p>Use the plot_distribution method to examine a column's distribution.</p> <p></p> <p>method prune(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, **kwargs)[source]Prune outliers from the training set.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>See the Pruner class for a description of the parameters.</p> <p>Note</p> <p>This transformation is only applied to the training set in order to maintain the original distribution of samples in the test set.</p> <p>Tip</p> <p>Use the outliers attribute to check the number of outliers per column.</p> <p></p> <p>method scale(strategy=\"standard\", include_binary=False, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>See the Scaler class for a description of the parameters.</p> <p>Tip</p> <p>Use the scaled attribute to check whether the dataset is scaled.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#nlp", "title": "NLP", "text": "<p>The Natural Language Processing (NLP) transformers help to convert raw text to meaningful numeric values, ready to be ingested by a model. All transformations are applied only on the column in the dataset called <code>corpus</code>. Read more in the user guide.</p> <p>textcleanApplies standard text cleaning to the corpus.textnormalizeNormalize the corpus.tokenizeTokenize the corpus.vectorizeVectorize the corpus.</p> <p></p> <p>method textclean(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, **kwargs)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>See the TextCleaner class for a description of the parameters.</p> <p></p> <p>method textnormalize(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, **kwargs)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>See the TextNormalizer class for a description of the parameters.</p> <p></p> <p>method tokenize(bigram_freq=None, trigram_freq=None, quadgram_freq=None, **kwargs)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>See the Tokenizer class for a description of the parameters.</p> <p></p> <p>method vectorize(strategy=\"bow\", return_sparse=True, **kwargs)[source]Vectorize the corpus.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>See the Vectorizer class for a description of the parameters.</p> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#feature-engineering", "title": "Feature engineering", "text": "<p>To further pre-process the data, it's possible to extract features from datetime columns, create new non-linear features transforming the existing ones, group similar features or, if the dataset is too large, remove features. Read more in the user guide.</p> <p>feature_extractionExtract features from datetime columns.feature_generationGenerate new features.feature_groupingExtract statistics from similar features.feature_selectionReduce the number of features in the data.</p> <p></p> <p>method feature_extraction(features=['day', 'month', 'year'], fmt=None, encoding_type=\"ordinal\", drop_columns=True, **kwargs)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>See the FeatureExtractor class for a description of the parameters.</p> <p></p> <p>method feature_generation(strategy=\"dfs\", n_features=None, operators=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>See the FeatureGenerator class for a description of the parameters.</p> <p></p> <p>method feature_grouping(group, operators=None, drop_columns=True, **kwargs)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>See the FeatureGrouper class for a description of the parameters.</p> <p></p> <p>method feature_selection(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>See the FeatureSelector class for a description of the parameters.</p> <p>Note</p> <ul> <li>When strategy=\"univariate\" and solver=None, f_classif   or f_regression is used as default solver.</li> <li>When strategy is \"sfs\", \"rfecv\" or any of the   advanced strategies and no scoring is specified,   atom's metric (if it exists) is used as scoring.</li> </ul> <p></p> <p></p>"}, {"location": "API/ATOM/atomregressor/#training", "title": "Training", "text": "<p>The training methods are where the models are fitted to the data and their performance is evaluated against a selected metric. There are three methods to call the three different training approaches. Read more in the user guide.</p> <p>runTrain and evaluate the models in a direct fashion.successive_halvingFit the models in a successive halving fashion.train_sizingTrain and evaluate the models in a train sizing fashion.</p> <p></p> <p>method run(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a direct fashion.</p> <p>Contrary to successive_halving and train_sizing, the direct approach only iterates once over the models, using the full dataset.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the DirectClassifier or DirectRegressor class for a description of the parameters.</p> <p></p> <p>method successive_halving(models, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Fit the models in a successive halving fashion.</p> <p>The successive halving technique is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, it is recommended to only use this technique with similar models, e.g. only using tree-based models.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the SuccessiveHalvingClassifier or SuccessiveHalvingRegressor class for a description of the parameters.</p> <p></p> <p>method train_sizing(models, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", **kwargs)[source]Train and evaluate the models in a train sizing fashion.</p> <p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. This method can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test    set (optional).</li> </ol> <p>See the TrainSizingClassifier or TrainSizingRegressor class for a description of the parameters.</p> <p></p>"}, {"location": "API/data_cleaning/balancer/", "title": "Balancer", "text": "<p>class atom.data_cleaning.Balancer(strategy=\"ADASYN\", n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)[source]Balance the number of samples per class in the target column.</p> <p>When oversampling, the newly created samples have an increasing integer index for numerical indices, and an index of the form [estimator]_N for non-numerical indices, where N stands for the N-th sample in the data set. Use only for classification tasks.</p> <p>This class can be accessed from atom through the balance method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>The clustercentroids estimator is unavailable because of    incompatibilities of the APIs.</li> <li>The Balancer class does not support multioutput tasks.</li> </ul> <p>Parametersstrategy: str or estimator, default=\"ADASYN\" Type of algorithm with which to balance the dataset. Choose from the name of any estimator in the imbalanced-learn package or provide a custom instance of such. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 - value.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: imblearn estimator Object (lowercase strategy) used to balance the data, e.g. <code>balancer.adasyn</code> for the default strategy. <p>mapping: dict Target values mapped to their respective encoded integer. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Imputer Handle missing values in the data.</p> <p>Pruner Prune outliers from the data.</p> <p></p>"}, {"location": "API/data_cleaning/balancer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.train)\n\natom.balance(strategy=\"smote\", verbose=2)\n\n# Note that the number of rows has increased\nprint(atom.train)\n</code></pre> <pre><code>from atom.data_cleaning import Balancer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nprint(X)\n\nbalancer = Balancer(strategy=\"smote\", verbose=2)\nX, y = balancer.transform(X, y)\n\n# Note that the number of rows has increased\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/balancer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformBalance the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=-1)[source]Balance the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str or sequence, default=-1 Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>Else: Array with shape=(n_samples,) to use as target.</li> </ul> <p>Returnsdataframe Balanced dataframe. <p>series Transformed target column. </p> <p></p>"}, {"location": "API/data_cleaning/cleaner/", "title": "Cleaner", "text": "<p>class atom.data_cleaning.Cleaner(convert_dtypes=True, drop_dtypes=None, drop_chars=None, strip_categorical=True, drop_duplicates=False, drop_missing_target=True, encode_target=True, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None)[source]Applies standard data cleaning steps on a dataset.</p> <p>Use the parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Convert dtypes to the best possible types.</li> <li>Drop columns with specific data types.</li> <li>Remove characters from column names.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column.</li> </ul> <p>This class can be accessed from atom through the clean method. Read more in the user guide.</p> <p>Parametersconvert_dtypes: bool, default=True Convert the column's data types to the best possible types that support <code>pd.NA</code>. <p>drop_dtypes: str, sequence or None, default=None Columns with these data types are dropped from the dataset. <p>drop_chars: str or None, default=None Remove the specified regex pattern from column names, e.g. <code>[^A-Za-z0-9]+</code> to remove all non-alphanumerical characters. <p>strip_categorical: bool, default=True Whether to strip spaces from categorical columns. <p>drop_duplicates: bool, default=False Whether to drop duplicate rows. Only the first occurrence of every duplicated row is kept. <p>drop_missing_target: bool, default=True Whether to drop rows with missing values in the target column. This transformation is ignored if <code>y</code> is not provided. <p>encode_target: bool, default=True Whether to encode the target column(s). This includes converting categorical columns to numerical, and binarizing multilabel columns. This transformation is ignored if <code>y</code> is not provided. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesmissing: list Values that are considered \"missing\". Default values are: \"\", \"?\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"none\", \"None\", \"inf\", \"-inf\". Note that <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code> are always considered missing since they are incompatible with sklearn estimators. <p>mapping: dict Target values mapped to their respective encoded integer. Only available if encode_target=True. <p>feature_names_in_: np.array Names of features seen during fit. <p>target_names_in_: np.array Names of target columns seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Discretizer Bin continuous data into intervals.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/cleaner/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\ny = [\"a\" if i else \"b\" for i in y]\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.y)\n\natom.clean(verbose=2)\n\nprint(atom.y)\n</code></pre> <pre><code>from atom.data_cleaning import Cleaner\nfrom numpy.random import randint\n\ny = [\"a\" if i else \"b\" for i in range(randint(100))]\n\ncleaner = Cleaner(verbose=2)\ny = cleaner.fit_transform(y=y)\n\nprint(y)\n</code></pre>"}, {"location": "API/data_cleaning/cleaner/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformInversely transform the label encoding.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the data cleaning steps to the data.</p> <p></p> <p>method fit(X=None, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>ReturnsCleaner Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Inversely transform the label encoding.</p> <p>This method only inversely transforms the target encoding. The rest of the transformations can't be inverted. If <code>encode_target=False</code>, the data is returned as is.</p> <p>ParametersX: dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Unchanged feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X=None, y=None)[source]Apply the data cleaning steps to the data.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/discretizer/", "title": "Discretizer", "text": "<p>class atom.data_cleaning.Discretizer(strategy=\"quantile\", bins=5, labels=None, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, random_state=None)[source]Bin continuous data into intervals.</p> <p>For each feature, the bin edges are computed during fit and, together with the number of bins, they define the intervals. Ignores categorical columns.</p> <p>This class can be accessed from atom through the discretize method. Read more in the user guide.</p> <p>Tip</p> <p>The transformation returns categorical columns. Use the Encoder class to convert them back to numerical types.</p> <p>Parametersstrategy: str, default=\"quantile\" Strategy used to define the widths of the bins. Choose from: <ul> <li>\"uniform\": All bins have identical widths.</li> <li>\"quantile\": All bins have the same number of points.</li> <li>\"kmeans\": Values in each bin have the same nearest center of   a 1D k-means cluster.</li> <li>\"custom\": Use custom bin edges provided through <code>bins</code>.</li> </ul> <p>bins: int, sequence or dict, default=5 Bin number or bin edges in which to split every column. <ul> <li>If int: Number of bins to produce for all columns. Only for   strategy!=\"custom\".</li> <li>If sequence:<ul> <li>For strategy!=\"custom\": Number of bins per column,   allowing for non-uniform width. The n-th value corresponds   to the n-th column that is transformed. Note that   categorical columns are automatically ignored.</li> <li>For strategy=\"custom\": Bin edges with length=n_bins - 1.   The outermost edges are always <code>-inf</code> and <code>+inf</code>, e.g.   bins <code>[1, 2]</code> indicate <code>(-inf, 1], (1, 2], (2, inf]</code>.</li> </ul> </li> <li>If dict: One of the aforementioned options per column, where   the key is the column's name.</li> </ul> <p>labels: sequence, dict or None, default=None Label names with which to replace the binned intervals. <ul> <li>If None: Use default labels of the form <code>(min_edge, max_edge]</code>.</li> <li>If sequence: Labels to use for all columns.</li> <li>If dict: Labels per column, where the key is the column's name.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. Only for strategy=\"quantile\". <p>Attributesfeature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Encoder Perform encoding of categorical features.</p> <p>Imputer Handle missing values in the data.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p></p>"}, {"location": "API/data_cleaning/discretizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom[\"mean radius\"])\n\natom.discretize(\n    strategy=\"custom\",\n    bins=[13, 18],\n    labels=[\"small\", \"medium\", \"large\"],\n    verbose=2,\n    columns=\"mean radius\",\n)\n\nprint(atom[\"mean radius\"])\n</code></pre> <pre><code>from atom.data_cleaning import Discretizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nprint(X[\"mean radius\"])\n\ndisc = Discretizer(\n    strategy=\"custom\",\n    bins=[13, 18],\n    labels=[\"small\", \"medium\", \"large\"],\n    verbose=2,\n)\nX[\"mean radius\"] = disc.fit_transform(X[[\"mean radius\"]])[\"mean radius\"]\n\nprint(X[\"mean radius\"])\n</code></pre>"}, {"location": "API/data_cleaning/discretizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformBin the data into intervals.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsDiscretizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Bin the data into intervals.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/data_cleaning/encoder/", "title": "Encoder", "text": "<p>class atom.data_cleaning.Encoder(strategy=\"Target\", max_onehot=10, ordinal=None, infrequent_to_value=None, value=\"infrequent\", verbose=0, logger=None, **kwargs)[source]Perform encoding of categorical features.</p> <p>The encoding type depends on the number of classes in the column:</p> <ul> <li>If n_classes=2 or ordinal feature, use Ordinal-encoding.</li> <li>If 2 &lt; n_classes &lt;= <code>max_onehot</code>, use OneHot-encoding.</li> <li>If n_classes &gt; <code>max_onehot</code>, use <code>strategy</code>-encoding.</li> </ul> <p>Missing values are propagated to the output column. Unknown classes encountered during transforming are imputed according to the selected strategy. Infrequent classes can be replaced with a value in order to prevent too high cardinality.</p> <p>This class can be accessed from atom through the encode method. Read more in the user guide.</p> <p>Warning</p> <p>Three category-encoders estimators are unavailable:</p> <ul> <li>OneHotEncoder: Use the max_onehot parameter.</li> <li>HashingEncoder: Incompatibility of APIs.</li> <li>LeaveOneOutEncoder: Incompatibility of APIs.</li> </ul> <p>Parametersstrategy: str or estimator, default=\"Target\" Type of encoding to use for high cardinality features. Choose from any of the estimators in the category-encoders package or provide a custom one. <p>max_onehot: int or None, default=10 Maximum number of unique values in a feature to perform one-hot encoding. If None, <code>strategy</code>-encoding is always used for columns with more than two classes. <p>ordinal: dict or None, default=None Order of ordinal features, where the dict key is the feature's name and the value is the class order, e.g. <code>{\"salary\": [\"low\", \"medium\", \"high\"]}</code>. <p>infrequent_to_value: int, float or None, default=None Replaces infrequent class occurrences in categorical columns with the string in parameter <code>value</code>. This transformation is done before the encoding of the column. <ul> <li>If None: Skip this step.</li> <li>If int: Minimum number of occurrences in a class.</li> <li>If float: Minimum fraction of occurrences in a class.</li> </ul> <p>value: str, default=\"infrequent\" Value with which to replace rare classes. This parameter is ignored if <code>infrequent_to_value=None</code>. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributesmapping: dict of dicts Encoded values and their respective mapping. The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...). <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Cleaner Applies standard data cleaning steps on a dataset.</p> <p>Imputer Handle missing values in the data.</p> <p>Pruner Prune outliers from the data.</p> <p></p>"}, {"location": "API/data_cleaning/encoder/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom numpy.random import randint\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nX[\"cat_feature_1\"] = [f\"x{i}\" for i in randint(0, 2, len(X))]\nX[\"cat_feature_2\"] = [f\"x{i}\" for i in randint(0, 3, len(X))]\nX[\"cat_feature_3\"] = [f\"x{i}\" for i in randint(0, 20, len(X))]\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.X)\n\natom.encode(strategy=\"target\", max_onehot=10, verbose=2)\n\n# Note the one-hot encoded column with name [feature]_[class]\nprint(atom.X)\n</code></pre> <pre><code>from atom.data_cleaning import Encoder\nfrom sklearn.datasets import load_breast_cancer\nfrom numpy.random import randint\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\nX[\"cat_feature_1\"] = [f\"x{i}\" for i in randint(0, 2, len(X))]\nX[\"cat_feature_2\"] = [f\"x{i}\" for i in randint(0, 3, len(X))]\nX[\"cat_feature_3\"] = [f\"x{i}\" for i in randint(0, 20, len(X))]\nprint(X)\n\nencoder = Encoder(strategy=\"target\", max_onehot=10, verbose=2)\nX = encoder.fit_transform(X, y)\n\n# Note the one-hot encoded column with name [feature]_[class]\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/encoder/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformEncode the data.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>Note that leaving y=None can lead to errors if the <code>strategy</code> encoder requires target values. For multioutput tasks, only the first target column is used to fit the encoder.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence or dataframe-like Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>ReturnsEncoder Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Encode the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Encoded dataframe. </p> <p></p>"}, {"location": "API/data_cleaning/imputer/", "title": "Imputer", "text": "<p>class atom.data_cleaning.Imputer(strat_num=\"drop\", strat_cat=\"drop\", max_nan_rows=None, max_nan_cols=None, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None)[source]Handle missing values in the data.</p> <p>Impute or remove missing values according to the selected strategy. Also removes rows and columns with too many missing values. Use the <code>missing</code> attribute to customize what are considered \"missing values\".</p> <p>This class can be accessed from atom through the impute method. Read more in the user guide.</p> <p>Parametersstrat_num: str, int or float, default=\"drop\" Imputing strategy for numerical columns. Choose from: <ul> <li>\"drop\": Drop rows containing missing values.</li> <li>\"mean\": Impute with mean of column.</li> <li>\"median\": Impute with median of column.</li> <li>\"knn\": Impute using a K-Nearest Neighbors approach.</li> <li>\"most_frequent\": Impute with most frequent value.</li> <li>int or float: Impute with provided numerical value.</li> </ul> <p>strat_cat: str, default=\"drop\" Imputing strategy for categorical columns. Choose from: <ul> <li>\"drop\": Drop rows containing missing values.</li> <li>\"most_frequent\": Impute with most frequent value.</li> <li>str: Impute with provided string.</li> </ul> <p>max_nan_rows: int, float or None, default=None Maximum number or fraction of missing values in a row (if more, the row is removed). If None, ignore this step. <p>max_nan_cols: int, float or None, default=None Maximum number or fraction of missing values in a column (if more, the column is removed). If None, ignore this step. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesmissing: list Values that are considered \"missing\". Default values are: \"\", \"?\", \"NA\", \"nan\", \"NaN\", \"NaT\", \"none\", \"None\", \"inf\", \"-inf\". Note that <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code> are always considered missing since they are incompatible with sklearn estimators. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Discretizer Bin continuous data into intervals.</p> <p>Encoder Perform encoding of categorical features.</p> <p></p>"}, {"location": "API/data_cleaning/imputer/#example", "title": "Example", "text": "atomstand-alone <pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom numpy.random import randint\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add some random missing values to the data\nfor i, j in zip(randint(0, X.shape[0], 600), randint(0, 4, 600)):\n    X.iat[i, j] = np.NaN\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.nans)\n\natom.impute(strat_num=\"median\", max_nan_rows=0.1, verbose=2)\n\nprint(atom.n_nans)\n</code></pre> <pre><code>import numpy as np\nfrom atom.data_cleaning import Imputer\nfrom numpy.random import randint\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add some random missing values to the data\nfor i, j in zip(randint(0, X.shape[0], 600), randint(0, 4, 600)):\n    X.iloc[i, j] = np.nan\n\nimputer = Imputer(strat_num=\"median\", max_nan_rows=0.1, verbose=2)\nX, y = imputer.fit_transform(X, y)\n\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/imputer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformImpute the missing values.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsImputer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Impute the missing values.</p> <p>Note that leaving y=None can lead to inconsistencies in data length between X and y if rows are dropped during the transformation.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Imputed dataframe. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/normalizer/", "title": "Normalizer", "text": "<p>class atom.data_cleaning.Normalizer(strategy=\"yeojohnson\", device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, random_state=None, **kwargs)[source]Transform the data to follow a Normal/Gaussian distribution.</p> <p>This transformation is useful for modeling issues related to heteroscedasticity (non-constant variance), or other situations where normality is desired. Missing values are disregarded in fit and maintained in transform. Categorical columns are ignored.</p> <p>This class can be accessed from atom through the normalize method. Read more in the user guide.</p> <p>Warning</p> <p>The quantile strategy performs a non-linear transformation. This may distort linear correlations between variables measured at the same scale but renders variables measured at different scales more directly comparable.</p> <p>Note</p> <p>The yeojohnson and boxcox strategies scale the data after transforming. Use the <code>kwargs</code> to change this behaviour.</p> <p>Parametersstrategy: str, default=\"yeojohnson\" The transforming strategy. Choose from: <ul> <li>\"yeojohnson\"</li> <li>\"boxcox\" (only works with strictly positive values)</li> <li>\"quantile\": Transform features using quantiles information.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the quantile strategy. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Object with which the data is transformed. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Cleaner Applies standard data cleaning steps on a dataset.</p> <p>Pruner Prune outliers from the data.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/normalizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n\natom.normalize(verbose=2)\n\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n</code></pre> <pre><code>from atom.data_cleaning import Normalizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nnormalizer = Normalizer(verbose=2)\nX = normalizer.fit_transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/normalizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformApply the inverse transformation to the data.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the transformations to the data.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsNormalizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X, y=None)[source]Apply the inverse transformation to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Original dataframe. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the transformations to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Normalized dataframe. </p> <p></p>"}, {"location": "API/data_cleaning/pruner/", "title": "Pruner", "text": "<p>class atom.data_cleaning.Pruner(strategy=\"zscore\", method=\"drop\", max_sigma=3, include_target=False, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)[source]Prune outliers from the data.</p> <p>Replace or remove outliers. The definition of outlier depends on the selected strategy and can greatly differ from one another. Ignores categorical columns.</p> <p>This class can be accessed from atom through the prune method. Read more in the user guide.</p> <p>Info</p> <p>The \"sklearnex\" and \"cuml\" engines are only supported for strategy=\"dbscan\".</p> <p>Parametersstrategy: str or sequence, default=\"zscore\" Strategy with which to select the outliers. If sequence of strategies, only samples marked as outliers by all chosen strategies are dropped. Choose from: <ul> <li>\"zscore\": Z-score of each data value.</li> <li>\"iforest\": Isolation Forest.</li> <li>\"ee\": Elliptic Envelope.</li> <li>\"lof\": Local Outlier Factor.</li> <li>\"svm\": One-class SVM.</li> <li>\"dbscan\": Density-Based Spatial Clustering.</li> <li>\"hdbscan\": Hierarchical Density-Based Spatial Clustering.</li> <li>\"optics\": DBSCAN-like clustering approach.</li> </ul> <p>method: int, float or str, default=\"drop\" Method to apply on the outliers. Only the zscore strategy accepts another method than \"drop\". Choose from: <ul> <li>\"drop\": Drop any sample with outlier values.</li> <li>\"minmax\": Replace outlier with the min/max of the column.</li> <li>Any numerical value with which to replace the outliers.</li> </ul> <p>max_sigma: int or float, default=3 Maximum allowed standard deviations from the mean of the column. If more, it is considered an outlier. Only if strategy=\"zscore\". <p>include_target: bool, default=False Whether to include the target column in the search for outliers. This can be useful for regression tasks. Only if strategy=\"zscore\". <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. If sequence of strategies, the params should be provided in a dict with the strategy's name as key. <p>Attributes[strategy]: sklearn estimator Object used to prune the data, e.g. <code>pruner.iforest</code> for the isolation forest strategy. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/pruner/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.prune(stratgey=\"iforest\", verbose=2)\n\n# Note the reduced number of rows\nprint(atom.dataset)\n\natom.plot_distribution(columns=0)\n</code></pre> <pre><code>from atom.data_cleaning import Normalizer\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nnormalizer = Normalizer(verbose=2)\nX = normalizer.fit_transform(X)\n\n# Note the reduced number of rows\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/pruner/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the outlier strategy on the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the outlier strategy on the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. <p>series Transformed target column. Only returned if provided. </p> <p></p>"}, {"location": "API/data_cleaning/scaler/", "title": "Scaler", "text": "<p>class atom.data_cleaning.Scaler(strategy=\"standard\", include_binary=False, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)[source]Scale the data.</p> <p>Apply one of sklearn's scalers. Categorical columns are ignored.</p> <p>This class can be accessed from atom through the scale method. Read more in the user guide.</p> <p>Parametersstrategy: str, default=\"standard\" Strategy with which to scale the data. Choose from: <ul> <li>\"standard\": Remove mean and scale to unit variance.</li> <li>\"minmax\": Scale features to a given range.</li> <li>\"maxabs\": Scale features by their maximum absolute value.</li> <li>\"robust\": Scale using statistics that are robust to outliers.</li> </ul> <p>include_binary: bool, default=False Whether to scale binary columns (only 0s and 1s). <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Object with which the data is scaled. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>Balancer Balance the number of samples per class in the target column.</p> <p>Normalizer Transform the data to follow a Normal/Gaussian distribution.</p> <p>Scaler Scale the data.</p> <p></p>"}, {"location": "API/data_cleaning/scaler/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.scale(verbose=2)\n\n# Note the reduced number of rows\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.data_cleaning import Scaler\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nscaler = Scaler(verbose=2)\nX = scaler.fit_transform(X)\n\n# Note the reduced number of rows\nprint(X)\n</code></pre>"}, {"location": "API/data_cleaning/scaler/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformApply the inverse transformation to the data.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformPerform standardization by centering and scaling.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsScaler Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X, y=None)[source]Apply the inverse transformation to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Scaled dataframe. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Perform standardization by centering and scaling.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Scaled dataframe. </p> <p></p>"}, {"location": "API/feature_engineering/featureextractor/", "title": "FeatureExtractor", "text": "<p>class atom.feature_engineering.FeatureExtractor(features=('day', 'month', 'year'), fmt=None, encoding_type=\"ordinal\", drop_columns=True, verbose=0, logger=None)[source]Extract features from datetime columns.</p> <p>Create new features extracting datetime elements (day, month, year, etc...) from the provided columns. Columns of dtype <code>datetime64</code> are used as is. Categorical columns that can be successfully converted to a datetime format (less than 30% NaT values after conversion) are also used.</p> <p>This class can be accessed from atom through the feature_extraction method. Read more in the user guide.</p> <p>Warning</p> <p>Decision trees based algorithms build their split rules according to one feature at a time. This means that they will fail to correctly process cyclic features since the sin/cos features should be considered one single coordinate system.</p> <p>Parametersfeatures: str or sequence, default=(\"day\", \"month\", \"year\") Features to create from the datetime columns. Note that created features with zero variance (e.g. the feature hour in a column that only contains dates) are ignored. Allowed values are datetime attributes from <code>pandas.Series.dt</code>. <p>fmt: str, sequence or None, default=None Format (<code>strptime</code>) of the categorical columns that need to be converted to datetime. If sequence, the n-th format corresponds to the n-th categorical column that can be successfully converted. If None, the format is inferred automatically from the first non NaN value. Values that can not be converted are returned as <code>NaT</code>. <p>encoding_type: str, default=\"ordinal\" Type of encoding to use. Choose from: <ul> <li>\"ordinal\": Encode features in increasing order.</li> <li>\"cyclic\": Encode features using sine and cosine to capture   their cyclic nature. This approach creates two columns for   every feature. Non-cyclic features still use ordinal encoding.</li> </ul> <p>drop_columns: bool, default=True Whether to drop the original columns after transformation. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesfeature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featureextractor/#example", "title": "Example", "text": "atomstand-alone <pre><code>import pandas as pd\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a datetime column\nX[\"date\"] = pd.date_range(start=\"1/1/2018\", periods=len(X))\n\natom = ATOMClassifier(X, y)\natom.feature_extraction(features=[\"day\"], fmt=\"%d/%m/%Y\", verbose=2)\n\n# Note the date_day column\nprint(atom.dataset)\n</code></pre> <pre><code>import pandas as pd\nfrom atom.feature_engineering import FeatureExtractor\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a datetime column\nX[\"date\"] = pd.date_range(start=\"1/1/2018\", periods=len(X))\n\nfe = FeatureExtractor(features=[\"day\"], fmt=\"%Y-%m-%d\", verbose=2)\nX = fe.transform(X)\n\n# Note the date_day column\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featureextractor/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformExtract the new features.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Extract the new features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featuregenerator/", "title": "FeatureGenerator", "text": "<p>class atom.feature_engineering.FeatureGenerator(strategy=\"dfs\", n_features=None, operators=None, n_jobs=1, verbose=0, logger=None, random_state=None, **kwargs)[source]Generate new features.</p> <p>Create new combinations of existing features to capture the non-linear relations between the original features.</p> <p>This class can be accessed from atom through the feature_generation method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>Using the <code>div</code>, <code>log</code> or <code>sqrt</code> operators can return new   features with <code>inf</code> or <code>NaN</code> values. Check the warnings that   may pop up or use atom's nans attribute.</li> <li>When using dfs with <code>n_jobs&gt;1</code>, make sure to protect your code   with <code>if __name__ == \"__main__\"</code>. Featuretools uses   dask, which uses python multiprocessing   for parallelization. The spawn method on multiprocessing   starts a new python process, which requires it to import the   __main__ module before it can do its task.</li> <li>gfg can be slow for very large populations.</li> </ul> <p>Tip</p> <p>dfs can create many new features and not all of them will be useful. Use the FeatureSelector class to reduce the number of features.</p> <p>Parametersstrategy: str, default=\"dfs\" Strategy to crate new features. Choose from: <ul> <li>\"dfs\": Deep Feature Synthesis.</li> <li>\"gfg\": Genetic Feature Generation.</li> </ul> <p>n_features: int or None, default=None Maximum number of newly generated features to add to the dataset. If None, select all created features. <p>operators: str, sequence or None, default=None Mathematical operators to apply on the features. None to use all. Choose from: <code>add</code>, <code>sub</code>, <code>mul</code>, <code>div</code>, <code>abs</code>, <code>sqrt</code>, <code>log</code>, <code>inv</code>, <code>sin</code>, <code>cos</code>, <code>tan</code>. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Additional keyword arguments for the SymbolicTransformer instance. Only for the gfg strategy. <p>Attributesgfg: SymbolicTransformer Object used to calculate the genetic features. Only for the gfg strategy. <p>genetic_features: pd.DataFrame Information on the newly created non-linear features. Only for the gfg strategy. Columns include: <ul> <li>name: Name of the feature (generated automatically).</li> <li>description: Operators used to create this feature.</li> <li>fitness: Fitness score.</li> </ul> <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featuregenerator/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_generation(strategy=\"dfs\", n_features=5, verbose=2)\n\n# Note the texture error / worst symmetry column\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.feature_engineering import FeatureGenerator\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\nfg = FeatureGenerator(strategy=\"dfs\", n_features=5, verbose=2)\nX = fg.fit_transform(X, y)\n\n# Note the radius error * worst smoothness column\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featuregenerator/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformGenerate new features.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Generate new features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featuregrouper/", "title": "FeatureGrouper", "text": "<p>class atom.feature_engineering.FeatureGrouper(group, operators=None, drop_columns=True, verbose=0, logger=None)[source]Extract statistics from similar features.</p> <p>Replace groups of features with related characteristics with new features that summarize statistical properties of te group. The statistical operators are calculated over every row of the group. The group names and features can be accessed through the <code>groups</code> method.</p> <p>This class can be accessed from atom through the feature_grouping method. Read more in the user guide.</p> <p>Tip</p> <p>Use a regex pattern with the <code>groups</code> parameter to select groups easier, e.g. <code>atom.feature_grouping({\"group1\": \"var_.+\")</code> to select all features that start with <code>var_</code>.</p> <p>Parametersgroup: dict Group names and features. Select the features by name, position or regex pattern. A feature can belong to multiple groups. <p>operators: str, sequence or None, default=None Statistical operators to apply on the groups. Any operator from <code>numpy</code> or <code>scipy.stats</code> (checked in that order) that is applied on an array can be used. If None, it uses: <code>min</code>, <code>max</code>, <code>mean</code>, <code>median</code>, <code>mode</code> and <code>std</code>. <p>drop_columns: bool, default=True Whether to drop the columns in <code>groups</code> after transformation. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesgroups: dict Names and features of every created group. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureSelector Reduce the number of features in the data.</p> <p></p>"}, {"location": "API/feature_engineering/featuregrouper/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_grouping({\"means\": [\"mean.+\"]}, verbose=2)\n\n# Note the mean features are gone and the new std(means) feature\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.feature_engineering import FeatureGrouper\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Group all features that start with mean\nfg = FeatureGrouper({\"means\": [\"mean.+\"]}, verbose=2)\nX = fg.transform(X)\n\n# Note the mean features are gone and the new std(means) feature\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featuregrouper/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformGroup features.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Group features.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p>"}, {"location": "API/feature_engineering/featureselector/", "title": "FeatureSelector", "text": "<p>class atom.feature_engineering.FeatureSelector(strategy=None, solver=None, n_features=None, min_repeated=2, max_repeated=1.0, max_correlation=1.0, n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, logger=None, random_state=None, **kwargs)[source]Reduce the number of features in the data.</p> <p>Apply feature selection or dimensionality reduction, either to improve the estimators' accuracy or to boost their performance on very high-dimensional datasets. Additionally, remove multicollinear and low variance features.</p> <p>This class can be accessed from atom through the feature_selection method. Read more in the user guide.</p> <p>Warning</p> <ul> <li>Ties between features with equal scores are broken in an   unspecified way.</li> <li>For strategy=\"rfecv\", the <code>n_features</code> parameter is the   minimum number of features to select, not the actual   number of features that the transformer returns. It may very   well be that it returns more!</li> </ul> <p>Info</p> <ul> <li>The \"sklearnex\" and \"cuml\" engines are only supported for   strategy=\"pca\" with dense datasets.</li> <li>If strategy=\"pca\" and the data is dense and unscaled, it's   scaled to mean=0 and std=1 before fitting the PCA transformer.</li> <li>If strategy=\"pca\" and the provided data is sparse, the used   estimator is TruncatedSVD, which works more efficiently   with sparse matrices.</li> </ul> <p>Tip</p> <p>Use the plot_feature_importance method to examine how much a specific feature contributes to the final predictions. If the model doesn't have a <code>feature_importances_</code> attribute, use plot_permutation_importance instead.</p> <p>Parametersstrategy: str or None, default=None Feature selection strategy to use. Choose from: <ul> <li>None: Do not perform any feature selection strategy.</li> <li>\"univariate\": Univariate statistical F-test.</li> <li>\"pca\": Principal Component Analysis.</li> <li>\"sfm\": Select best features according to a model.</li> <li>\"sfs\": Sequential Feature Selection.</li> <li>\"rfe\": Recursive Feature Elimination.</li> <li>\"rfecv\": RFE with cross-validated selection.</li> <li>\"pso\": Particle Swarm Optimization.</li> <li>\"hho\": Harris Hawks Optimization.</li> <li>\"gwo\": Grey Wolf Optimization.</li> <li>\"dfo\": Dragonfly Optimization.</li> <li>\"go\": Genetic Optimization.</li> </ul> <p>solver: str, estimator or None, default=None Solver/estimator to use for the feature selection strategy. See the corresponding documentation for an extended description of the choices. If None, the default value is used (only if strategy=\"pca\"). Choose from: <ul> <li> <p>If strategy=\"univariate\":</p> <ul> <li>\"f_classif\"</li> <li>\"f_regression\"</li> <li>\"mutual_info_classif\"</li> <li>\"mutual_info_regression\"</li> <li>\"chi2\"</li> <li>Any function with signature <code>func(X, y) -&gt; (scores, p-values)</code>.</li> </ul> </li> <li> <p>If strategy=\"pca\":</p> <ul> <li> <p>If data is dense:</p> <ul> <li> <p>If engine=\"sklearn\":</p> <ul> <li>\"auto\" (default)</li> <li>\"full\"</li> <li>\"arpack\"</li> <li>\"randomized\"</li> </ul> </li> <li> <p>If engine=\"sklearnex\":</p> <ul> <li>\"full\" (default)</li> </ul> </li> <li> <p>If engine=\"cuml\":</p> <ul> <li>\"full\" (default)</li> <li>\"jacobi\"</li> </ul> </li> </ul> </li> <li> <p>If data is sparse:</p> <ul> <li>\"randomized\" (default)</li> <li>\"arpack\"</li> </ul> </li> </ul> </li> <li> <p>for the remaining strategies:   The base estimator. For sfm, rfe and rfecv, it should have   either a <code>feature_importances_</code> or <code>coef_</code> attribute after   fitting. You can use one of the predefined models. Add   <code>_class</code> or <code>_reg</code> after the model's  name to specify a   classification or regression task, e.g. <code>solver=\"LGB_reg\"</code>   (not necessary if called from atom). No default option.</p> </li> </ul> <p>n_features: int, float or None, default=None Number of features to select. <ul> <li>If None: Select all features.</li> <li>If &lt;1: Fraction of the total features to select.</li> <li>If &gt;=1: Number of features to select.</li> </ul> <p>If strategy=\"sfm\" and the threshold parameter is not specified, the threshold is automatically set to <code>-inf</code> to select <code>n_features</code> number of features.</p> <p>If strategy=\"rfecv\", <code>n_features</code> is the minimum number of features to select.</p> <p>This parameter is ignored if any of the following strategies is selected: pso, hho, gwo, dfo, go.</p> <p>min_repeated: int, float or None, default=2 Remove categorical features if there isn't any repeated value in at least <code>min_repeated</code> rows. The default is to keep all features with non-maximum variance, i.e. remove the features which number of unique values is equal to the number of rows (usually the case for names, IDs, etc...). <ul> <li>If None: No check for minimum repetition.</li> <li>If &gt;1: Minimum repetition number.</li> <li>If &lt;=1: Minimum repetition fraction.</li> </ul> <p>max_repeated: int, float or None, default=1.0 Remove categorical features with the same value in at least <code>max_repeated</code> rows. The default is to keep all features with non-zero variance, i.e. remove the features that have the same value in all samples. <ul> <li>If None: No check for maximum repetition.</li> <li>If &gt;1: Maximum number of repeated occurences.</li> <li>If &lt;=1: Maximum fraction of repeated occurences.</li> </ul> <p>max_correlation: float or None, default=1.0 Minimum absolute Pearson correlation to identify correlated features. For each group, it removes all except the feature with the highest correlation to <code>y</code> (if provided, else it removes all but the first). The default value removes equal columns. If None, skip this step. <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p>**kwargs Any extra keyword argument for the strategy estimator. See the corresponding documentation for the available options. <p>Attributescollinear: pd.DataFrame Information on the removed collinear features. Columns include: <ul> <li>drop: Name of the dropped feature.</li> <li>corr_feature: Names of the correlated features.</li> <li>corr_value: Corresponding correlation coefficients.</li> </ul> <p>[strategy]: sklearn transformer Object used to transform the data, e.g. <code>fs.pca</code> for the pca strategy. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>FeatureExtractor Extract features from datetime columns.</p> <p>FeatureGenerator Generate new features.</p> <p>FeatureGrouper Extract statistics from similar features.</p> <p></p>"}, {"location": "API/feature_engineering/featureselector/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y)\natom.feature_selection(strategy=\"pca\", n_features=12, verbose=2)\n\n# Note that the column names changed\nprint(atom.dataset)\n\natom.plot_pca()\n</code></pre> <pre><code>from atom.feature_engineering import FeatureSelector\nfrom sklearn.datasets import load_breast_cancer\n\nX, _ = load_breast_cancer(return_X_y=True, as_frame=True)\n\nfs = FeatureSelector(strategy=\"pca\", n_features=12, verbose=2)\nX = fs.fit_transform(X)\n\n# Note that the column names changed\nprint(X)\n</code></pre>"}, {"location": "API/feature_engineering/featureselector/#methods", "title": "Methods", "text": "<p>fitFit the feature selector to the data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.plot_componentsPlot the explained variance ratio per component.plot_pcaPlot the explained variance ratio vs number of components.plot_rfecvPlot the rfecv results.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformTransform the data.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method fit(X, y=None)[source]Fit the feature selector to the data.</p> <p>The univariate, sfm (when model is not fitted), sfs, rfe and rfecv strategies need a target column. Leaving it None raises an exception.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method plot_components(show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the explained variance ratio per component.</p> <p>Kept components are colored and discarted components are transparent. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parametersshow: int or None, default=None Number of components to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of components shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method plot_pca(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the explained variance ratio vs number of components.</p> <p>If the underlying estimator is PCA (for dense datasets), all possible components are plotted. If the underlying estimator is TruncatedSVD (for sparse datasets), it only shows the selected components. The star marks the number of components selected by the user. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method plot_rfecv(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the rfecv results.</p> <p>Plot the scores obtained by the estimator fitted on every subset of the dataset. Only available when feature selection was applied with strategy=\"rfecv\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Transform the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed feature set. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/adab/", "title": "AdaBoost", "text": "<p>AdaBaccept sparse</p> <p>AdaBoost is a meta-estimator that begins by fitting a classifier/regressor on the original dataset and then fits additional copies of the algorithm on the same dataset but where the weights of instances are adjusted according to the error of the current prediction.</p> <p>Corresponding estimators are:</p> <ul> <li>AdaBoostClassifier for classification tasks.</li> <li>AdaBoostRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>RandomForest Random Forest.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/adab/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"AdaB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/adab/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=50, step=10)learning_rateFloatDistribution(high=10.0, log=True, low=0.01, step=None)algorithmCategoricalDistribution(choices=('SAMME.R', 'SAMME'))</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=50, step=10)learning_rateFloatDistribution(high=10.0, log=True, low=0.01, step=None)lossCategoricalDistribution(choices=('linear', 'square', 'exponential'))</p> <p></p> <p></p>"}, {"location": "API/models/adab/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/adab/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/adab/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/adab/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/adab/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ard/", "title": "AutomaticRelevanceDetermination", "text": "<p>ARDneeds scaling</p> <p>Automatic Relevance Determination is very similar to BayesianRidge, but can lead to sparser coefficients. Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions.</p> <p>Corresponding estimators are:</p> <ul> <li>ARDRegression for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>GaussianProcess Gaussian process.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p></p>"}, {"location": "API/models/ard/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"ARD\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ard/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersn_iterIntDistribution(high=1000, log=False, low=100, step=10)alpha_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)alpha_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/ard/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ard/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ard/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ard/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/ard/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/arima/", "title": "ARIMA", "text": "<p>ARIMAnative multioutput</p> <p>Seasonal ARIMA models and exogeneous input is supported, hence this estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX.</p> <p>An ARIMA model, is a generalization of an autoregressive moving average (ARMA) model, and is fitted to time-series data in an effort to forecast future points. ARIMA models can be especially efficacious in cases where data shows evidence of non-stationarity.</p> <p>The \"AR\" part of ARIMA indicates that the evolving variable of interest is regressed on its own lagged (i.e., prior observed) values. The \"MA\" part indicates that the regression error is actually a linear combination of error terms whose values occurred contemporaneously and at various times in the past. The \"I\" (for \"integrated\") indicates that the data values have been replaced with the difference between their values and the previous values (and this differencing process may have been performed more than once).</p> <p>Corresponding estimators are:</p> <ul> <li>ARIMA for forecasting tasks.</li> </ul> <p>Warning</p> <p>ARIMA often runs into numerical errors when optimizing the hyperparameters. Possible solutions are:</p> <ul> <li>Use the AutoARIMA model instead.</li> <li>Use <code>est_params</code> to specify the   orders manually, e.g. <code>atom.run(\"arima\", n_trials=5,est_params={\"order\": (1, 1, 0)})</code>.</li> <li>Use the <code>catch</code> parameter in <code>ht_params</code>   to avoid raising every exception, e.g. <code>atom.run(\"arima\",n_trials=5, ht_params={\"catch\": (Exception,)})</code>.</li> </ul> <p></p> <p>See Also</p> <p>AutoARIMA Automatic Autoregressive Integrated Moving Average Model.</p> <p></p>"}, {"location": "API/models/arima/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_longley\n\n_, X = load_longley()\n\natom = ATOMForecaster(X)\natom.run(models=\"ARIMA\", verbose=2)\n</code></pre>"}, {"location": "API/models/arima/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterspIntDistribution(high=2, log=False, low=0, step=1)dIntDistribution(high=1, log=False, low=0, step=1)qIntDistribution(high=2, log=False, low=0, step=1)PsIntDistribution(high=2, log=False, low=0, step=1)DsIntDistribution(high=1, log=False, low=0, step=1)QsIntDistribution(high=2, log=False, low=0, step=1)SCategoricalDistribution(choices=(0, 4, 6, 7, 12))methodCategoricalDistribution(choices=('newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg', 'basinhopping'))maxiterIntDistribution(high=200, log=False, low=50, step=10)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/arima/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/arima/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/arima/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/arima/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/arima/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/autoarima/", "title": "AutoARIMA", "text": "<p>AutoARIMAnative multioutput</p> <p>ARIMA implementation that includes automated fitting of (S)ARIMA(X) hyperparameters (p, d, q, P, D, Q). The AutoARIMA algorithm seeks to identify the most optimal parameters for an ARIMA model, settling on a single fitted ARIMA model. This process is based on the commonly-used R function.</p> <p>AutoARIMA works by conducting differencing tests (i.e., Kwiatkowski\u2013Phillips\u2013Schmidt\u2013Shin, Augmented Dickey-Fuller or Phillips\u2013Perron) to determine the order of differencing, d, and then fitting models within defined ranges. AutoARIMA also seeks to identify the optimal P and Q hyperparameters after conducting the Canova-Hansen to determine the optimal order of seasonal differencing.</p> <p>Note that due to stationarity issues, AutoARIMA might not find a suitable model that will converge. If this is the case, a ValueError is thrown suggesting stationarity-inducing measures be taken prior to re-fitting or that a new range of order values be selected.</p> <p>Corresponding estimators are:</p> <ul> <li>AutoARIMA for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p></p>"}, {"location": "API/models/autoarima/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_longley\n\n_, X = load_longley()\n\natom = ATOMForecaster(X, random_state=1)\natom.run(models=\"autoarima\", verbose=2)\n</code></pre>"}, {"location": "API/models/autoarima/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersmethodCategoricalDistribution(choices=('newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg', 'basinhopping'))maxiterIntDistribution(high=200, log=False, low=50, step=10)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/autoarima/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/autoarima/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/autoarima/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/autoarima/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/autoarima/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/bag/", "title": "Bagging", "text": "<p>Bagaccept sparse</p> <p>Bagging uses an ensemble meta-estimator that fits base predictors on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator by introducing randomization into its construction procedure and then making an ensemble out of it.</p> <p>Corresponding estimators are:</p> <ul> <li>BaggingClassifier for classification tasks.</li> <li>BaggingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>LogisticRegression Logistic Regression.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/bag/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Bag\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/bag/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)max_samplesFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)max_featuresFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)bootstrapCategoricalDistribution(choices=(True, False))bootstrap_featuresCategoricalDistribution(choices=(True, False))</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)max_samplesFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)max_featuresFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)bootstrapCategoricalDistribution(choices=(True, False))bootstrap_featuresCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/bag/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/bag/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/bag/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/bag/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/bag/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/bnb/", "title": "BernoulliNB", "text": "<p>BNBaccept sparsesupports acceleration</p> <p>BernoulliNB implements the Naive Bayes algorithm for multivariate Bernoulli models. Like MultinomialNB, this classifier is suitable for discrete data. The difference is that while MNB works with occurrence counts, BNB is designed for binary/boolean features.</p> <p>Corresponding estimators are:</p> <ul> <li>BernoulliNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ComplementNB Complement Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>MultinomialNB Multinomial Naive Bayes.</p> <p></p>"}, {"location": "API/models/bnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"BNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/bnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/bnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/bnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/bnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/bnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/bnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/br/", "title": "BayesianRidge", "text": "<p>BRneeds scaling</p> <p>Bayesian regression techniques can be used to include regularization parameters in the estimation procedure: the regularization parameter is not set in a hard sense but tuned to the data at hand.</p> <p>Corresponding estimators are:</p> <ul> <li>BayesianRidge for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>AutomaticRelevanceDetermination Automatic Relevance Determination.</p> <p>GaussianProcess Gaussian process.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p></p>"}, {"location": "API/models/br/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"BR\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/br/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersn_iterIntDistribution(high=1000, log=False, low=100, step=10)alpha_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)alpha_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_1FloatDistribution(high=1.0, log=True, low=0.0001, step=None)lambda_2FloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/br/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/br/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/br/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/br/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/br/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/catb/", "title": "CatBoost", "text": "<p>CatBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>CatBoost is a machine learning method based on gradient boosting over decision trees. Main advantages of CatBoost:</p> <ul> <li>Superior quality when compared with other GBDT models on many   datasets.</li> <li>Best in class prediction speed.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>CatBoostClassifier for classification tasks.</li> <li>CatBoostRegressor for regression tasks.</li> </ul> <p>Read more in CatBoost's documentation.</p> <p>Warning</p> <ul> <li>CatBoost selects the weights achieved by the best evaluation   on the test set after training. This means that, by default,   there is some minor data leakage in the test set. Use the   <code>use_best_model=False</code> parameter to avoid this behavior or use   a holdout set to evaluate the final estimator.</li> <li>In-training validation and pruning are disabled when   <code>device=\"gpu\"</code>.</li> </ul> <p>Note</p> <p>ATOM uses CatBoost's <code>n_estimators</code> parameter instead of <code>iterations</code> to indicate the number of trees to fit. This is done to have consistent naming with the XGBoost and LightGBM models.</p> <p></p> <p>See Also</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/catb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CatB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/catb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)bootstrap_typeCategoricalDistribution(choices=('Bayesian', 'Bernoulli'))bagging_temperatureFloatDistribution(high=10.0, log=False, low=0.0, step=None)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)bootstrap_typeCategoricalDistribution(choices=('Bayesian', 'Bernoulli'))bagging_temperatureFloatDistribution(high=10.0, log=False, low=0.0, step=None)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/catb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/catb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/catb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/catb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/catb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/catnb/", "title": "CategoricalNB", "text": "<p>CatNBaccept sparsesupports acceleration</p> <p>Categorical Naive Bayes implements the Naive Bayes algorithm for categorical features.</p> <p>Corresponding estimators are:</p> <ul> <li>CategoricalNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p></p>"}, {"location": "API/models/catnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nimport numpy as np\n\nX = np.random.randint(5, size=(100, 100))\ny = np.random.randint(2, size=100)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CatNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/catnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/catnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/catnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/catnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/catnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/catnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/cnb/", "title": "ComplementNB", "text": "<p>CNBaccept sparsesupports acceleration</p> <p>The Complement Naive Bayes classifier was designed to correct the \"severe assumptions\" made by the standard MultinomialNB classifier. It is particularly suited for imbalanced datasets.</p> <p>Corresponding estimators are:</p> <ul> <li>ComplementNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>MultinomialNB Multinomial Naive Bayes.</p> <p></p>"}, {"location": "API/models/cnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"CNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/cnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))normCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))normCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/cnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/cnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/cnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/cnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/cnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/dummy/", "title": "Dummy", "text": "<p>Dummy</p> <p>When doing supervised learning, a simple sanity check consists of comparing one's estimator against simple rules of thumb. The prediction methods completely ignore the input data. Do not use this model for real problems. Use it only as a simple baseline to compare with other models.</p> <p>Corresponding estimators are:</p> <ul> <li>DummyClassifier for classification tasks.</li> <li>DummyRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>NaiveForecaster Naive Forecaster.</p> <p></p>"}, {"location": "API/models/dummy/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Dummy\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/dummy/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersstrategyCategoricalDistribution(choices=('most_frequent', 'prior', 'stratified', 'uniform'))</p> <p>ParametersstrategyCategoricalDistribution(choices=('mean', 'median', 'quantile'))quantileFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/dummy/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/dummy/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/dummy/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/dummy/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/dummy/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/en/", "title": "ElasticNet", "text": "<p>ENneeds scalingaccept sparsesupports acceleration</p> <p>Linear least squares with l1 and l2 regularization.</p> <p>Corresponding estimators are:</p> <ul> <li>ElasticNet for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/en/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"EN\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/en/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p></p> <p></p>"}, {"location": "API/models/en/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/en/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/en/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/en/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/en/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/es/", "title": "ExponentialSmoothing", "text": "<p>ESnative multioutput</p> <p>Holt-Winters exponential smoothing forecaster. The default settings use simple exponential smoothing, without trend and seasonality components.</p> <p>Corresponding estimators are:</p> <ul> <li>ExponentialSmoothing for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/es/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"ES\", verbose=2)\n</code></pre>"}, {"location": "API/models/es/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterstrendCategoricalDistribution(choices=('add', 'mul', None))damped_trendCategoricalDistribution(choices=(True, False))seasonalCategoricalDistribution(choices=('add', 'mul', None))spCategoricalDistribution(choices=(4, 6, 7, 12, None))use_boxcoxCategoricalDistribution(choices=(True, False))initialization_methodCategoricalDistribution(choices=('estimated', 'heuristic'))methodCategoricalDistribution(choices=('L-BFGS-B', 'TNC', 'SLSQP', 'Powell', 'trust-constr', 'bh', 'ls'))</p> <p></p> <p></p>"}, {"location": "API/models/es/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/es/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/es/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/es/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/es/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/et/", "title": "ExtraTrees", "text": "<p>ETaccept sparsenative multilabelnative multioutput</p> <p>Extra-Trees use a meta estimator that fits a number of randomized decision trees (a.k.a. extra-trees) on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.</p> <p>Corresponding estimators are:</p> <ul> <li>ExtraTreesClassifier for classification tasks.</li> <li>ExtraTreesRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/et/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"ET\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/et/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/et/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/et/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/et/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/et/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/et/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/etree/", "title": "ExtraTree", "text": "<p>ETreeaccept sparsenative multilabelnative multioutput</p> <p>Extra-trees differ from classic decision trees in the way they are built. When looking for the best split to separate the samples of a node into two groups, random splits are drawn for each of the max_features randomly selected features and the best split among those is chosen. When max_features is set 1, this amounts to building a totally random decision tree.</p> <p>Corresponding estimators are:</p> <ul> <li>ExtraTreeClassifier for classification tasks.</li> <li>ExtraTreeRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/etree/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"ETree\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/etree/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterscriterionCategoricalDistribution(choices=('gini', 'entropy'))splitterCategoricalDistribution(choices=('random', 'best'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterscriterionCategoricalDistribution(choices=('squared_error', 'absolute_error'))splitterCategoricalDistribution(choices=('random', 'best'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/etree/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/etree/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/etree/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/etree/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/etree/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ets/", "title": "ETS", "text": "<p>ETSnative multioutput</p> <p>The ETS models are a family of time series models with an underlying state space model consisting of a level component, a trend component (T), a seasonal component (S), and an error term (E).</p> <p>Corresponding estimators are:</p> <ul> <li>AutoETS for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ExponentialSmoothing Exponential Smoothing forecaster.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/ets/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"ETS\", verbose=2)\n</code></pre>"}, {"location": "API/models/ets/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterserrorCategoricalDistribution(choices=('add', 'mul'))trendCategoricalDistribution(choices=('add', 'mul', None))damped_trendCategoricalDistribution(choices=(True, False))seasonalCategoricalDistribution(choices=('add', 'mul', None))spCategoricalDistribution(choices=(1, 4, 6, 7, 12))initialization_methodCategoricalDistribution(choices=('estimated', 'heuristic'))maxiterIntDistribution(high=2000, log=False, low=500, step=100)autoCategoricalDistribution(choices=(True, False))information_criterionCategoricalDistribution(choices=('aic', 'bic', 'aicc'))</p> <p></p> <p></p>"}, {"location": "API/models/ets/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ets/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ets/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ets/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/ets/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gbm/", "title": "GradientBoostingMachine", "text": "<p>GBMaccept sparse</p> <p>A Gradient Boosting Machine builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage <code>n_classes_</code> regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.</p> <p>Corresponding estimators are:</p> <ul> <li>GradientBoostingClassifier for classification tasks.</li> <li>GradientBoostingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Tip</p> <p>HistGradientBoosting is a much faster variant of this algorithm for intermediate datasets (n_samples &gt;= 10k).</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>HistGradientBoosting Histogram-based Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/gbm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GBM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gbm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterslossCategoricalDistribution(choices=('log_loss', 'exponential'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)n_estimatorsIntDistribution(high=500, log=False, low=10, step=10)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)criterionCategoricalDistribution(choices=('friedman_mse', 'squared_error'))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_depthIntDistribution(high=21, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'absolute_error', 'huber', 'quantile'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)n_estimatorsIntDistribution(high=500, log=False, low=10, step=10)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)criterionCategoricalDistribution(choices=('friedman_mse', 'squared_error'))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_depthIntDistribution(high=21, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)alphaFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/gbm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gbm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gbm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gbm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gbm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gnb/", "title": "GaussianNB", "text": "<p>GNBsupports acceleration</p> <p>Gaussian Naive Bayes implements the Naive Bayes algorithm for classification. The likelihood of the features is assumed to be Gaussian.</p> <p>Corresponding estimators are:</p> <ul> <li>GaussianNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>CategoricalNB Categorical Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p></p>"}, {"location": "API/models/gnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/gp/", "title": "GaussianProcess", "text": "<p>GP</p> <p>Gaussian Processes are a generic supervised learning method designed to solve regression and probabilistic classification problems. The advantages of Gaussian processes are:</p> <ul> <li>The prediction interpolates the observations.</li> <li>The prediction is probabilistic (Gaussian) so that one can compute   empirical confidence intervals and decide based on those if one   should refit (online fitting, adaptive fitting) the prediction in   some region of interest.</li> </ul> <p>The disadvantages of Gaussian processes include:</p> <ul> <li>They are not sparse, i.e. they use the whole samples/features   information to perform the prediction.</li> <li>They lose efficiency in high dimensional spaces, namely when the   number of features exceeds a few dozens.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>GaussianProcessClassifier for classification tasks.</li> <li>GaussianProcessRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>PassiveAggressive Passive Aggressive.</p> <p></p>"}, {"location": "API/models/gp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"GP\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/gp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/gp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/gp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/gp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/gp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/hgbm/", "title": "HistGradientBoosting", "text": "<p>hGBM</p> <p>This Histogram-based Gradient Boosting Machine is much faster than the standard GradientBoostingMachine for big datasets (n_samples&gt;=10k). This variation first bins the input samples into integer-valued bins which tremendously reduces the number of splitting points to consider, and allows the algorithm to leverage integer-based data structures (histograms) instead of relying on sorted continuous values when building the trees.</p> <p>Corresponding estimators are:</p> <ul> <li>HistGradientBoostingClassifier for classification tasks.</li> <li>HistGradientBoostingRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/hgbm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"hGBM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/hgbm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parameterslearning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_iterIntDistribution(high=500, log=False, low=10, step=10)max_leaf_nodesIntDistribution(high=50, log=False, low=10, step=1)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_leafIntDistribution(high=30, log=False, low=10, step=1)l2_regularizationFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson', 'quantile', 'gamma'))learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_iterIntDistribution(high=500, log=False, low=10, step=10)max_leaf_nodesIntDistribution(high=50, log=False, low=10, step=1)max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_leafIntDistribution(high=30, log=False, low=10, step=1)l2_regularizationFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/hgbm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/hgbm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/hgbm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/hgbm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/hgbm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/huber/", "title": "HuberRegression", "text": "<p>Huberneeds scaling</p> <p>Huber is a linear regression model that is robust to outliers. It makes sure that the loss function is not heavily influenced by the outliers while not completely ignoring their effect.</p> <p>Corresponding estimators are:</p> <ul> <li>HuberRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>AutomaticRelevanceDetermination Automatic Relevance Determination.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/huber/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Huber\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/huber/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersepsilonFloatDistribution(high=10.0, log=True, low=1.0, step=None)max_iterIntDistribution(high=500, log=False, low=50, step=10)alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/huber/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/huber/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/huber/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/huber/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/huber/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/knn/", "title": "KNearestNeighbors", "text": "<p>KNNneeds scalingaccept sparsenative multilabelnative multioutputsupports acceleration</p> <p>K-Nearest Neighbors, as the name clearly indicates, implements the k-nearest neighbors vote. For regression, the target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.</p> <p>Corresponding estimators are:</p> <ul> <li>KNeighborsClassifier for classification tasks.</li> <li>KNeighborsRegressor for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p></p>"}, {"location": "API/models/knn/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"KNN\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/knn/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> cpugpu <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> sklearnsklearnexcuml <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> cpugpu <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>Parametersn_neighborsIntDistribution(high=100, log=False, low=1, step=1)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p></p> <p></p>"}, {"location": "API/models/knn/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/knn/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/knn/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/knn/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/knn/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lars/", "title": "LeastAngleRegression", "text": "<p>Larsneeds scaling</p> <p>Least-Angle Regression is a regression algorithm for high-dimensional data. Lars is similar to forward stepwise regression. At each step, it finds the feature most correlated with the target. When there are multiple features having equal correlation, instead of continuing along the same feature, it proceeds in a direction equiangular between the features.</p> <p>Corresponding estimators are:</p> <ul> <li>Lars for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>HuberRegression Huber regressor.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/lars/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Lars\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/lars/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lars/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lars/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lars/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lars/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lasso/", "title": "Lasso", "text": "<p>Lassoneeds scalingaccept sparsesupports acceleration</p> <p>Linear least squares with l1 regularization.</p> <p>Corresponding estimators are:</p> <ul> <li>Lasso for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/lasso/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Lasso\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/lasso/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)selectionCategoricalDistribution(choices=('cyclic', 'random'))</p> <p></p> <p></p>"}, {"location": "API/models/lasso/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lasso/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lasso/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lasso/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lasso/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lda/", "title": "LinearDiscriminantAnalysis", "text": "<p>LDA</p> <p>Linear Discriminant Analysis is a classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes\u2019 rule. The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearDiscriminantAnalysis for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LogisticRegression Logistic Regression.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p></p>"}, {"location": "API/models/lda/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"LDA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lda/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterssolverCategoricalDistribution(choices=('svd', 'lsqr', 'eigen'))shrinkageCategoricalDistribution(choices=(None, 'auto', 0.5, 0.6, 0.7, 0.8, 0.9, 1.0))</p> <p></p> <p></p>"}, {"location": "API/models/lda/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lda/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lda/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lda/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lda/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lgb/", "title": "LightGBM", "text": "<p>LGBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>LightGBM is a gradient boosting model that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:</p> <ul> <li>Faster training speed and higher efficiency.</li> <li>Lower memory usage.</li> <li>Better accuracy.</li> <li>Capable of handling large-scale data.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>LGBMClassifier for classification tasks.</li> <li>LGBMRegressor for regression tasks.</li> </ul> <p>Read more in LightGBM's documentation.</p> <p>Info</p> <p>Using LightGBM's GPU acceleration requires additional software dependencies.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>XGBoost Extreme Gradient Boosting.</p> <p></p>"}, {"location": "API/models/lgb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"LGB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lgb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=17, log=False, low=-1, step=2)num_leavesIntDistribution(high=40, log=False, low=20, step=1)min_child_weightFloatDistribution(high=100.0, log=True, low=0.0001, step=None)min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=17, log=False, low=-1, step=2)num_leavesIntDistribution(high=40, log=False, low=20, step=1)min_child_weightFloatDistribution(high=100.0, log=True, low=0.0001, step=None)min_child_samplesIntDistribution(high=30, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/lgb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lgb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lgb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lgb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lgb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lr/", "title": "LogisticRegression", "text": "<p>LRneeds scalingaccept sparsesupports acceleration</p> <p>Logistic regression, despite its name, is a linear model for classification rather than regression. Logistic regression is also known in the literature as logit regression, maximum-entropy classification (MaxEnt) or the log-linear classifier. In this model, the probabilities describing the possible outcomes of a single trial are modeled using a logistic function.</p> <p>Corresponding estimators are:</p> <ul> <li>LogisticRegression for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>GaussianProcess Gaussian process.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>PassiveAggressive Passive Aggressive.</p> <p></p>"}, {"location": "API/models/lr/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"RF\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lr/#hyperparameters", "title": "Hyperparameters", "text": "sklearnsklearnexcuml <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> cpugpu <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'))max_iterIntDistribution(high=1000, log=False, low=100, step=10)l1_ratioFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/lr/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lr/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lr/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lr/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/lr/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/lsvm/", "title": "LinearSVM", "text": "<p>lSVMneeds scalingaccept sparsesupports acceleration</p> <p>Similar to SupportVectorMachine but with a linear kernel. Implemented in terms of liblinear rather than libsvm, so it has more flexibility in the choice of penalties and loss functions and should scale better to large numbers of samples.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearSVC for classification tasks.</li> <li>LinearSVR for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>KNearestNeighbors K-Nearest Neighbors.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p>SupportVectorMachine Support Vector Machine.</p> <p></p>"}, {"location": "API/models/lsvm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"lSVM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/lsvm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearncuml <p>ParameterspenaltyCategoricalDistribution(choices=('l1', 'l2'))lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p>ParameterspenaltyCategoricalDistribution(choices=('l1', 'l2'))lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> sklearncuml <p>ParameterslossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p>ParameterslossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))CFloatDistribution(high=100.0, log=True, low=0.001, step=None)dualCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/lsvm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/lsvm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/lsvm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/lsvm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/lsvm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/mlp/", "title": "MultiLayerPerceptron", "text": "<p>MLPneeds scalingaccept sparsenative multilabelallows validation</p> <p>Multi-layer Perceptron is a supervised learning algorithm that learns a function by training on a dataset. Given a set of features and a target, it can learn a non-linear function approximator for either classification or regression. It is different from logistic regression, in that between the input and the output layer, there can be one or more non-linear layers, called hidden layers.</p> <p>Corresponding estimators are:</p> <ul> <li>MLPClassifier for classification tasks.</li> <li>MLPRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>PassiveAggressive Passive Aggressive.</p> <p>Perceptron Linear Perceptron classification.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/mlp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"MLP\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/mlp/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametershidden_layer_1IntDistribution(high=100, log=False, low=10, step=1)hidden_layer_2IntDistribution(high=100, log=False, low=0, step=1)hidden_layer_3IntDistribution(high=10, log=False, low=0, step=1)activationCategoricalDistribution(choices=('identity', 'logistic', 'tanh', 'relu'))solverCategoricalDistribution(choices=('lbfgs', 'sgd', 'adam'))alphaFloatDistribution(high=0.1, log=True, low=0.0001, step=None)batch_sizeCategoricalDistribution(choices=('auto', 8, 16, 32, 64, 128, 256))learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'adaptive'))learning_rate_initFloatDistribution(high=0.1, log=True, low=0.001, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=500, log=False, low=50, step=10)</p> <p>Parametershidden_layer_1IntDistribution(high=100, log=False, low=10, step=1)hidden_layer_2IntDistribution(high=100, log=False, low=0, step=1)hidden_layer_3IntDistribution(high=10, log=False, low=0, step=1)activationCategoricalDistribution(choices=('identity', 'logistic', 'tanh', 'relu'))solverCategoricalDistribution(choices=('lbfgs', 'sgd', 'adam'))alphaFloatDistribution(high=0.1, log=True, low=0.0001, step=None)batch_sizeCategoricalDistribution(choices=('auto', 8, 16, 32, 64, 128, 256))learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'adaptive'))learning_rate_initFloatDistribution(high=0.1, log=True, low=0.001, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=500, log=False, low=50, step=10)</p> <p></p> <p></p>"}, {"location": "API/models/mlp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/mlp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/mlp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/mlp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/mlp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/mnb/", "title": "MultinomialNB", "text": "<p>MNBaccept sparsesupports acceleration</p> <p>MultinomialNB implements the Naive Bayes algorithm for multinomially distributed data, and is one of the two classic Naive Bayes variants used in text classification (where the data are typically represented as word vector counts, although tf-idf vectors are also known to work well in practice).</p> <p>Corresponding estimators are:</p> <ul> <li>MultinomialNB for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>BernoulliNB Bernoulli Naive Bayes.</p> <p>ComplementNB Complement Naive Bayes.</p> <p>GaussianNB Gaussian Naive Bayes.</p> <p></p>"}, {"location": "API/models/mnb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"MNB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/mnb/#hyperparameters", "title": "Hyperparameters", "text": "sklearncuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.01, step=None)fit_priorCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/mnb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/mnb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/mnb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/mnb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/mnb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/nf/", "title": "NaiveForecaster", "text": "<p>NFnative multioutput</p> <p>NaiveForecaster is a dummy forecaster that makes forecasts using simple strategies based on naive assumptions about past trends continuing. When used in multivariate tasks, each column is forecasted with the same strategy.</p> <p>Corresponding estimators are:</p> <ul> <li>NaiveForecaster for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ExponentialSmoothing Exponential Smoothing forecaster.</p> <p>Dummy Dummy classifier/regressor.</p> <p>PolynomialTrend Polynomial Trend forecaster.</p> <p></p>"}, {"location": "API/models/nf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"NF\", verbose=2)\n</code></pre>"}, {"location": "API/models/nf/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersstrategyCategoricalDistribution(choices=('last', 'mean', 'drift'))</p> <p></p> <p></p>"}, {"location": "API/models/nf/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/nf/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/nf/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/nf/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/nf/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ols/", "title": "OrdinaryLeastSquares", "text": "<p>OLSneeds scalingaccept sparsesupports acceleration</p> <p>Ordinary Least Squares is just linear regression without any regularization. It fits a linear model with coefficients <code>w=(w1,  ..., wp)</code> to minimize the residual sum of squares between the observed targets in the dataset, and the targets predicted by the linear approximation.</p> <p>Corresponding estimators are:</p> <ul> <li>LinearRegression for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>Ridge Linear least squares with l2 regularization.</p> <p></p>"}, {"location": "API/models/ols/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"OLS\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ols/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ols/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ols/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ols/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/ols/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/omp/", "title": "OrthogonalMatchingPursuit", "text": "<p>OMPneeds scaling</p> <p>Orthogonal Matching Pursuit implements the OMP algorithm for approximating the fit of a linear model with constraints imposed on the number of non-zero coefficients.</p> <p>Corresponding estimators are:</p> <ul> <li>OrthogonalMatchingPursuit for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>Lasso Linear Regression with lasso regularization.</p> <p>LeastAngleRegression Least Angle Regression.</p> <p>OrdinaryLeastSquares Linear Regression.</p> <p></p>"}, {"location": "API/models/omp/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"OMP\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/omp/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/omp/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/omp/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/omp/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/omp/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/pa/", "title": "PassiveAggressive", "text": "<p>PAneeds scalingaccept sparseallows validation</p> <p>The passive-aggressive algorithms are a family of algorithms for large-scale learning. They are similar to the Perceptron in that they do not require a learning rate. However, contrary to the Perceptron, they include a regularization parameter <code>C</code>.</p> <p>Corresponding estimators are:</p> <ul> <li>PassiveAggressiveClassifier for classification tasks.</li> <li>PassiveAggressiveRegressor for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>Perceptron Linear Perceptron classification.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/pa/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"PA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/pa/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)max_iterIntDistribution(high=1500, log=False, low=500, step=50)lossCategoricalDistribution(choices=('hinge', 'squared_hinge'))averageCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)max_iterIntDistribution(high=1500, log=False, low=500, step=50)lossCategoricalDistribution(choices=('epsilon_insensitive', 'squared_epsilon_insensitive'))averageCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/pa/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/pa/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/pa/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/pa/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/pa/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/perc/", "title": "Perceptron", "text": "<p>Percneeds scalingallows validation</p> <p>The Perceptron is a simple classification algorithm suitable for large scale learning. By default:</p> <ul> <li>It does not require a learning rate.</li> <li>It is not regularized (penalized).</li> <li>It updates its model only on mistakes.</li> </ul> <p>The last characteristic implies that the Perceptron is slightly faster to train than StochasticGradientDescent with the hinge loss and that the resulting models are sparser.</p> <p>Corresponding estimators are:</p> <ul> <li>Perceptron for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>PassiveAggressive Passive Aggressive.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/perc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Perc\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/perc/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParameterspenaltyCategoricalDistribution(choices=(None, 'l2', 'l1', 'elasticnet'))alphaFloatDistribution(high=10.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/perc/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/perc/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/perc/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/perc/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/perc/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/pt/", "title": "PolynomialTrend", "text": "<p>PTnative multioutput</p> <p>Forecast time series data with a polynomial trend, using a sklearn LinearRegression class to regress values of time series on index, after extraction of polynomial features.</p> <p>Corresponding estimators are:</p> <ul> <li>PolynomialTrendForecaster for forecasting tasks.</li> </ul> <p></p> <p>See Also</p> <p>ARIMA Autoregressive Integrated Moving Average Model.</p> <p>ETS ETS model with automatic fitting capabilities.</p> <p>NaiveForecaster Naive Forecaster.</p> <p></p>"}, {"location": "API/models/pt/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.run(models=\"PT\", verbose=2)\n</code></pre>"}, {"location": "API/models/pt/#hyperparameters", "title": "Hyperparameters", "text": "<p>ParametersdegreeIntDistribution(high=5, log=False, low=1, step=1)with_interceptCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/pt/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/pt/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/pt/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/pt/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_interval_train: dataframePrediction interval on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_test: dataframePrediction interval on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_interval_holdout: dataframe | NonePrediction interval on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_proba_train: NormalProbabilistic forecast on the training set.predict_proba_test: NormalProbabilistic forecast on the test set.predict_proba_holdout: Normal | NoneProbabilistic forecast on the holdout set.predict_quantiles_train: dataframeQuantile forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_test: dataframeQuantile forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_quantiles_holdout: dataframe | NoneQuantile forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, 2) for univariate tasks.</li> <li>(n_samples, 2 * n_targets) for multivariate tasks.predict_residuals_train: series | dataframeResiduals forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_test: series | dataframeResiduals forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_residuals_holdout: series | dataframe | NoneResiduals forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_train: dataframeVariance forecast on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_test: dataframeVariance forecast on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.predict_var_holdout: dataframe | NoneVariance forecast on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for univariate tasks.</li> <li>(n_samples, n_targets) for multivariate tasks.</li> </ul> <p></p>"}, {"location": "API/models/pt/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_residualsGet residuals of forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(fh, X=None, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, range, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_interval(fh, X=None, coverage=0.9, verbose=None)[source]Get prediction intervals on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_interval</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, 2) or shape=(n_samples, 2 * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_proba(fh, X=None, marginal=True, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>marginal: bool, default=True Whether returned distribution is marginal by time index. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnssktime.proba.Normal Predicted distribution. </p> <p></p> <p>method predict_quantiles(fh, X=None, alpha=[0.05, 0.95], verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_quantiles</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are computed. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples, len(alpha)) or shape=(n_samples, len(alpha) * n_targets) for multivariate tasks. </p> <p></p> <p>method predict_residuals(y, X=None, verbose=None)[source]Get residuals of forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_residuals</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations to compute residuals to. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method predict_var(fh, X=None, cov=False, verbose=None)[source]Get probabilistic forecasts on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_var</code> method.</p> <p>Read more in the user guide.</p> <p>Parametersfh: int, sequence or ForecastingHorizon The forecasting horizon encoding the time stamps to forecast at. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>cov: bool, default=False Whether to computes covariance matrix forecast or marginal variance forecasts. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multivariate tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(y, X=None, fh=None, metric=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sktime's score method for estimators.</p> <p>Parametersy: sequence or dataframe-like Ground truth observations. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. <p>fh: int, sequence or ForecastingHorizon or None, default=None The forecasting horizon encoding the time stamps to forecast at. <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of y with respect to a ground truth. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/qda/", "title": "QuadraticDiscriminantAnalysis", "text": "<p>QDA</p> <p>Quadratic Discriminant Analysis is a classifier with a quadratic decision boundary, generated by fitting class conditional densities to the data and using Bayes\u2019 rule. The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix.</p> <p>Corresponding estimators are:</p> <ul> <li>QuadraticDiscriminantAnalysis for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>LogisticRegression Logistic Regression.</p> <p>RadiusNearestNeighbors Radius Nearest Neighbors.</p> <p></p>"}, {"location": "API/models/qda/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"QDA\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/qda/#hyperparameters", "title": "Hyperparameters", "text": "<p>Parametersreg_paramFloatDistribution(high=1.0, log=False, low=0.0, step=0.1)</p> <p></p> <p></p>"}, {"location": "API/models/qda/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/qda/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/qda/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/qda/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/qda/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/rf/", "title": "RandomForest", "text": "<p>RFaccept sparsenative multilabelnative multioutputsupports acceleration</p> <p>Random forests are an ensemble learning method that operate by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees. Random forests correct for decision trees' habit of overfitting to their training set.</p> <p>Corresponding estimators are:</p> <ul> <li>RandomForestClassifier for classification tasks.</li> <li>RandomForestRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Warning</p> <p>cuML's implementation of RandomForestClassifier only supports predictions on dtype <code>float32</code>. Convert all dtypes before calling atom's run method to avoid exceptions.</p> <p></p> <p>See Also</p> <p>DecisionTree Single Decision Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>HistGradientBoosting Histogram-based Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/rf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"RF\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/rf/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> cpugpu <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('gini', 'entropy'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> sklearnsklearnexcuml <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> cpugpu <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=10, step=10)criterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'poisson'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))bootstrapCategoricalDistribution(choices=(True, False))max_samplesCategoricalDistribution(choices=(None, 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/rf/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/rf/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/rf/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/rf/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/rf/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/ridge/", "title": "Ridge", "text": "<p>Ridgeneeds scalingaccept sparsenative multilabelsupports acceleration</p> <p>If classifier, it first converts the target values into {-1, 1} and then treats the problem as a regression task.</p> <p>Corresponding estimators are:</p> <ul> <li>RidgeClassifier for classification tasks.</li> <li>Ridge for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p>Warning</p> <p>Engines <code>sklearnex</code> and <code>cuml</code> are only available for regression tasks.</p> <p></p> <p>See Also</p> <p>BayesianRidge Bayesian ridge regression.</p> <p>ElasticNet Linear Regression with elasticnet regularization.</p> <p>Lasso Linear Regression with lasso regularization.</p> <p></p>"}, {"location": "API/models/ridge/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import fetch_california_housing\n\nX, y = fetch_california_housing(return_X_y=True)\n\natom = ATOMRegressor(X, y, random_state=1)\natom.run(models=\"Ridge\", metric=\"r2\", verbose=2)\n</code></pre>"}, {"location": "API/models/ridge/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> sklearnsklearnexcuml <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> cpugpu <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p>ParametersalphaFloatDistribution(high=10.0, log=True, low=0.001, step=None)solverCategoricalDistribution(choices=('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'))</p> <p></p> <p></p>"}, {"location": "API/models/ridge/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/ridge/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/ridge/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/ridge/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/ridge/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/rnn/", "title": "RadiusNearestNeighbors", "text": "<p>RNNneeds scalingaccept sparsenative multilabelnative multioutput</p> <p>Radius Nearest Neighbors implements the nearest neighbors vote, where the neighbors are selected from within a given radius. For regression, the target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.</p> <p>Warning</p> <ul> <li>The <code>radius</code> parameter should be tuned to the data at hand or   the model will perform poorly.</li> <li>If outliers are detected, the estimator raises an exception   unless <code>est_params={\"outlier_label\": \"most_frequent\"}</code> is used.</li> </ul> <p>Corresponding estimators are:</p> <ul> <li>RadiusNeighborsClassifier for classification tasks.</li> <li>RadiusNeighborsRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>KNearestNeighbors K-Nearest Neighbors.</p> <p>LinearDiscriminantAnalysis Linear Discriminant Analysis.</p> <p>QuadraticDiscriminantAnalysis Quadratic Discriminant Analysis.</p> <p></p>"}, {"location": "API/models/rnn/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RNN\",\n    metric=\"f1\",\n    est_params={\"outlier_label\": \"most_frequent\"},\n    verbose=2,\n)\n</code></pre>"}, {"location": "API/models/rnn/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParametersradiusFloatDistribution(high=100.0, log=False, low=0.01, step=None)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p>ParametersradiusFloatDistribution(high=100.0, log=False, low=0.01, step=None)weightsCategoricalDistribution(choices=('uniform', 'distance'))algorithmCategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute'))leaf_sizeIntDistribution(high=40, log=False, low=20, step=1)pIntDistribution(high=2, log=False, low=1, step=1)</p> <p></p> <p></p>"}, {"location": "API/models/rnn/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/rnn/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/rnn/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/rnn/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/rnn/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/sgd/", "title": "StochasticGradientDescent", "text": "<p>SGDneeds scalingaccept sparseallows validation</p> <p>Stochastic Gradient Descent is a simple yet very efficient approach to fitting linear classifiers and regressors under convex loss functions. Even though SGD has been around in the machine learning community for a long time, it has received a considerable amount of attention just recently in the context of large-scale learning.</p> <p>Corresponding estimators are:</p> <ul> <li>SGDClassifier for classification tasks.</li> <li>SGDRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>PassiveAggressive Passive Aggressive.</p> <p>SupportVectorMachine Support Vector Machine.</p> <p></p>"}, {"location": "API/models/sgd/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"SGD\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/sgd/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterslossCategoricalDistribution(choices=('hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'))penaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)epsilonFloatDistribution(high=1.0, log=True, low=0.0001, step=None)learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'optimal', 'adaptive'))eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)averageCategoricalDistribution(choices=(True, False))</p> <p>ParameterslossCategoricalDistribution(choices=('squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'))penaltyCategoricalDistribution(choices=(None, 'l1', 'l2', 'elasticnet'))alphaFloatDistribution(high=1.0, log=True, low=0.0001, step=None)l1_ratioFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)max_iterIntDistribution(high=1500, log=False, low=500, step=50)epsilonFloatDistribution(high=1.0, log=True, low=0.0001, step=None)learning_rateCategoricalDistribution(choices=('constant', 'invscaling', 'optimal', 'adaptive'))eta0FloatDistribution(high=10.0, log=True, low=0.01, step=None)power_tFloatDistribution(high=0.9, log=False, low=0.1, step=0.1)averageCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/sgd/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/sgd/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/sgd/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/sgd/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/sgd/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/svm/", "title": "SupportVectorMachine", "text": "<p>SVMneeds scalingaccept sparsesupports acceleration</p> <p>The implementation of the Support Vector Machine is based on libsvm. The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples. For large datasets consider using a LinearSVM or a StochasticGradientDescent model instead.</p> <p>Corresponding estimators are:</p> <ul> <li>SVC for classification tasks.</li> <li>SVR for classification tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>LinearSVM Linear Support Vector Machine.</p> <p>MultiLayerPerceptron Multi-layer Perceptron.</p> <p>StochasticGradientDescent Stochastic Gradient Descent.</p> <p></p>"}, {"location": "API/models/svm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"SVM\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/svm/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression sklearnsklearnexcuml <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> cpugpu <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> sklearnsklearnexcuml <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> cpugpu <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p>ParametersCFloatDistribution(high=100.0, log=True, low=0.001, step=None)kernelCategoricalDistribution(choices=('linear', 'poly', 'rbf', 'sigmoid'))degreeIntDistribution(high=5, log=False, low=2, step=1)gammaCategoricalDistribution(choices=('scale', 'auto'))coef0FloatDistribution(high=1.0, log=False, low=-1.0, step=None)epsilonFloatDistribution(high=100.0, log=True, low=0.001, step=None)shrinkingCategoricalDistribution(choices=(True, False))</p> <p></p> <p></p>"}, {"location": "API/models/svm/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/svm/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/svm/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/svm/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributesdecision_function_train: series | dataframePredicted confidence scores on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_test: series | dataframePredicted confidence scores on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.decision_function_holdout: series | dataframe | NonePredicted confidence scores on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for binary classification.</li> <li>(n_samples, n_classes) for multiclass classification.</li> <li>(n_samples, n_targets) for multilabel classification.predict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.</li> </ul> <p></p>"}, {"location": "API/models/svm/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/tree/", "title": "DecisionTree", "text": "<p>Treeaccept sparsenative multilabelnative multioutput</p> <p>A single decision tree classifier/regressor.</p> <p>Corresponding estimators are:</p> <ul> <li>DecisionTreeClassifier for classification tasks.</li> <li>DecisionTreeRegressor for regression tasks.</li> </ul> <p>Read more in sklearn's documentation.</p> <p></p> <p>See Also</p> <p>ExtraTree Extremely Randomized Tree.</p> <p>ExtraTrees Extremely Randomized Trees.</p> <p>RandomForest Random Forest.</p> <p></p>"}, {"location": "API/models/tree/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"Tree\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/tree/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>ParameterscriterionCategoricalDistribution(choices=('gini', 'entropy'))splitterCategoricalDistribution(choices=('best', 'random'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p>ParameterscriterionCategoricalDistribution(choices=('squared_error', 'absolute_error', 'friedman_mse', 'poisson'))splitterCategoricalDistribution(choices=('best', 'random'))max_depthCategoricalDistribution(choices=(None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))min_samples_splitIntDistribution(high=20, log=False, low=2, step=1)min_samples_leafIntDistribution(high=20, log=False, low=1, step=1)max_featuresCategoricalDistribution(choices=(None, 'sqrt', 'log2', 0.5, 0.6, 0.7, 0.8, 0.9))ccp_alphaFloatDistribution(high=0.035, log=False, low=0.0, step=0.005)</p> <p></p> <p></p>"}, {"location": "API/models/tree/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/tree/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/tree/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/tree/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/tree/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/models/xgb/", "title": "XGBoost", "text": "<p>XGBneeds scalingaccept sparseallows validationsupports acceleration</p> <p>XGBoost is an optimized distributed gradient boosting model designed to be highly efficient, flexible and portable. XGBoost provides a parallel tree boosting that solve many data science problems in a fast and accurate way.</p> <p>Corresponding estimators are:</p> <ul> <li>XGBClassifier for classification tasks.</li> <li>XGBRegressor for regression tasks.</li> </ul> <p>Read more in XGBoost's documentation.</p> <p></p> <p>See Also</p> <p>CatBoost Cat Boosting Machine.</p> <p>GradientBoostingMachine Gradient Boosting Machine.</p> <p>LightGBM Light Gradient Boosting Machine.</p> <p></p>"}, {"location": "API/models/xgb/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(models=\"XGB\", metric=\"f1\", verbose=2)\n</code></pre>"}, {"location": "API/models/xgb/#hyperparameters", "title": "Hyperparameters", "text": "classificationregression <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=20, log=False, low=1, step=1)gammaFloatDistribution(high=1.0, log=False, low=0.0, step=None)min_child_weightIntDistribution(high=10, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p>Parametersn_estimatorsIntDistribution(high=500, log=False, low=20, step=10)learning_rateFloatDistribution(high=1.0, log=True, low=0.01, step=None)max_depthIntDistribution(high=20, log=False, low=1, step=1)gammaFloatDistribution(high=1.0, log=False, low=0.0, step=None)min_child_weightIntDistribution(high=10, log=False, low=1, step=1)subsampleFloatDistribution(high=1.0, log=False, low=0.5, step=0.1)colsample_bytreeFloatDistribution(high=1.0, log=False, low=0.4, step=0.1)reg_alphaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)reg_lambdaFloatDistribution(high=100.0, log=True, low=0.0001, step=None)</p> <p></p> <p></p>"}, {"location": "API/models/xgb/#attributes", "title": "Attributes", "text": ""}, {"location": "API/models/xgb/#data-attributes", "title": "Data attributes", "text": "<p>Attributespipeline: pd.SeriesTransformers fitted on the data. <p>Models that used automated feature scaling have the scaler added. Use this attribute only to access the individual instances. To visualize the pipeline, use the plot_pipeline method.</p> <p>mapping: dictEncoded values and their respective mapped values. <p>The column name is the key to its mapping dictionary. Only for columns mapped to a single column (e.g. Ordinal, Leave-one-out, etc...).dataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/models/xgb/#utility-attributes", "title": "Utility attributes", "text": "<p>Attributesname: strName of the model. <p>Use the property's <code>@setter</code> to change the model's name. The acronym always stays at the beginning of the model's name. If the model is being tracked by mlflow, the name of the corresponding run also changes.study: Study | NoneOptuna study used for hyperparameter tuning.trials: pd.DataFrame | NoneOverview of the trials' results. <p>All durations are in seconds. Columns include:</p> <ul> <li>params: Parameters used for this trial.</li> <li>estimator: Estimator used for this trial.</li> <li>score: Objective score(s) of the trial.</li> <li>time_trial: Duration of the trial.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>state: Trial's state (COMPLETE, PRUNED, FAIL).best_trial: FrozenTrial | NoneTrial that returned the highest score. <p>For multi-metric runs, the best trial is the trial that performed best on the main metric. Use the property's <code>@setter</code> to change the best trial. See here an example.best_params: dictHyperparameters used by the best trial.score_ht: float | list[float] | NoneMetric score obtained by the best trial.time_ht: float | NoneDuration of the hyperparameter tuning (in seconds).estimator: PREDICTOREstimator fitted on the training set.evals: dictScores obtained per iteration of the training. <p>Only the scores of the main metric are tracked. Included keys are: train and test. Read more in the user guide.score_train: SCALAR | list[SCALAR]Metric score on the training set.score_test: SCALAR | list[SCALAR]Metric score on the test set.score_holdout: SCALAR | list[SCALAR]Metric score on the holdout set.time_fit: floatDuration of the model fitting on the train set (in seconds).bootstrap: pd.DataFrame | NoneOverview of the bootstrapping scores. <p>The dataframe has shape=(n_bootstrap, metric) and shows the score obtained by every bootstrapped sample for every metric. Using <code>atom.bootstrap.mean()</code> yields the same values as score_bootstrap.score_bootstrap: SCALAR | list[SCALAR] | NoneMean metric score on the bootstrapped samples.time_bootstrap: float | NoneDuration of the bootstrapping (in seconds).time: floatTotal duration of the run (in seconds).feature_importance: pd.Series | NoneNormalized feature importance scores. <p>The sum of importances for all features is 1. The scores are extracted from the estimator's <code>scores_</code>, <code>coef_</code> or <code>feature_importances_</code> attribute, checked in that order. Returns None for estimators without any of those attributes.results: pd.SeriesOverview of the training results. <p>All durations are in seconds. Values include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the run.</li> </ul> <p></p>"}, {"location": "API/models/xgb/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction attributes are not calculated until the attribute is called for the first time. This mechanism avoids having to calculate attributes that are never used, saving time and memory.</p> <p>Attributespredict_train: series | dataframePredictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_test: series | dataframePredictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_holdout: series | dataframe | NonePredictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples,) for non-multioutput tasks.</li> <li>(n_samples, n_targets) for multioutput tasks.predict_log_proba_train: dataframeClass log-probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_test: dataframeClass log-probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_log_proba_holdout: dataframe | NoneClass log-probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_train: dataframeClass probability predictions on the training set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_test: dataframeClass probability predictions on the test set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.predict_proba_holdout: dataframe | NoneClass probability predictions on the holdout set. <p>The shape of the output depends on the task:</p> <ul> <li>(n_samples, n_classes) for binary and multiclass.</li> <li>(n_samples, n_targets) for multilabel.</li> <li>(n_samples * n_classes, n_targets) for multiclass-multioutput.</li> </ul> <p></p>"}, {"location": "API/models/xgb/#methods", "title": "Methods", "text": "<p>The plots can be called directly from the model. The remaining utility methods can be found hereunder.</p> <p>bootstrappingApply a bootstrap algorithm.calibrateCalibrate the model.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from the model.create_appCreate an interactive app to test model predictions.create_dashboardCreate an interactive dashboard to analyze the model.cross_validateEvaluate the model using cross-validation.decision_functionGet confidence scores on new data or existing rows.evaluateGet the model's scores for the provided metrics.export_pipelineExport the model's pipeline to a sklearn-like object.fitFit and validate the model.full_trainTrain the estimator on the complete dataset.get_best_thresholdGet the threshold that maximizes the ROC curve.hyperparameter_tuningRun the hyperparameter tuning algorithm.inverse_transformInversely transform new data through the pipeline.logPrint message and save to log file.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.registerRegister the model in mlflow's model registry.reset_aestheticsReset the plot aesthetics to their default values.saveSave the instance to a pickle file.save_estimatorSave the estimator to a pickle file.scoreGet a metric score on new data.serveServe the model as rest API endpoint for inference.transformTransform new data through the pipeline.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.</p> <p></p> <p>method bootstrapping(n_bootstrap, reset=False)[source]Apply a bootstrap algorithm.</p> <p>Take bootstrapped samples from the training set and test them on the test set to get a distribution of the model's results.</p> <p>Parametersn_bootstrap: int umber of bootstrapped samples to fit on. <p>reset: bool, default=False Whether to start a new run or continue the existing one. </p> <p></p> <p>method calibrate(**kwargs)[source]Calibrate the model.</p> <p>Applies probability calibration on the model. The estimator is trained via cross-validation on a subset of the training data, using the rest to fit the calibrator. The new classifier will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started using the name <code>[model_name]_calibrate</code>. Since the estimator changed, the model is cleared. Only for classifiers.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's CCV. Using cv=\"prefit\" will use the trained model and fit the calibrator on the test set. Use this only if you have another, independent set for testing. </p> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from the model.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method create_app(**kwargs)[source]Create an interactive app to test model predictions.</p> <p>Demo your machine learning model with a friendly web interface. This app launches directly in the notebook or on an external browser page. The created Interface instance can be accessed through the <code>app</code> attribute.</p> <p>Parameters**kwargs Additional keyword arguments for the Interface instance or the Interface.launch method. </p> <p></p> <p>method create_dashboard(dataset=\"test\", filename=None, **kwargs)[source]Create an interactive dashboard to analyze the model.</p> <p>ATOM uses the explainerdashboard package to provide a quick and easy way to analyze and explain the predictions and workings of the model. The dashboard allows you to investigate SHAP values, permutation importances, interaction effects, partial dependence plots, all kinds of performance plots, and even individual decision trees.</p> <p>By default, the dashboard renders in a new tab in your default browser, but if preferable, you can render it inside the notebook using the <code>mode=\"inline\"</code> parameter. The created ExplainerDashboard instance can be accessed through the <code>dashboard</code> attribute. This method is not available for multioutput tasks.</p> <p>Note</p> <p>Plots displayed by the dashboard are not created by ATOM and can differ from those retrieved through this package.</p> <p>Parametersdataset: str, default=\"test\" Data set to get the report from. Choose from: \"train\", \"test\", \"both\" (train and test) or \"holdout\". <p>filename: str or None, default=None Name to save the file with (as .html). None to not save anything. <p>**kwargs Additional keyword arguments for the ExplainerDashboard instance. </p> <p></p> <p>method cross_validate(**kwargs)[source]Evaluate the model using cross-validation.</p> <p>This method cross-validates the whole pipeline on the complete dataset. Use it to assess the robustness of the solution's performance.</p> <p>Parameters**kwargs Additional keyword arguments for sklearn's cross_validate function. If the scoring method is not specified, it uses atom's metric. <p>Returnspd.DataFrame Overview of the results. </p> <p></p> <p>method decision_function(X, verbose=None)[source]Get confidence scores on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>decision_function</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predicted confidence scores with shape=(n_samples,) for binary classification tasks or shape=(n_samples, n_classes) for multiclass classification tasks. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get the model's scores for the provided metrics.</p> <p>Tip</p> <p>Use the self-get_best_threshold or plot_threshold method to determine a suitable value for the <code>threshold</code> parameter.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metrics to calculate. If None, a selection of the most common metrics per task are used. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column, as returned by the get_best_threshold method). If float, the same threshold is applied to all target columns.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.Series Scores of the model. </p> <p></p> <p>method export_pipeline(memory=None, verbose=None)[source]Export the model's pipeline to a sklearn-like object.</p> <p>The returned pipeline is already fitted on the training set. Note that, if the model used automated feature scaling, the Scaler is added to the pipeline.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmemory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method fit(X=None, y=None)[source]Fit and validate the model.</p> <p>The estimator is fitted using the best hyperparameters found during hyperparameter tuning. Afterwards, the estimator is evaluated on the test set. Only use this method to re-fit the model after having continued the study.</p> <p>ParametersX: dataframe or None Feature set with shape=(n_samples, n_features). If None, <code>self.X_train</code> is used. <p>y: series or None Target column corresponding to X. If None, <code>self.y_train</code> is used. </p> <p></p> <p>method full_train(include_holdout=False)[source]Train the estimator on the complete dataset.</p> <p>In some cases it might be desirable to use all available data to train a final model. Note that doing this means that the estimator can no longer be evaluated on the test set. The newly retrained estimator will replace the <code>estimator</code> attribute. If there is an active mlflow experiment, a new run is started with the name <code>[model_name]_full_train</code>. Since the estimator changed, the model is cleared.</p> <p>Warning</p> <p>Although the model is trained on the complete dataset, the pipeline is not. To get a fully trained pipeline, use: <code>pipeline = atom.export_pipeline().fit(atom.X, atom.y)</code>.</p> <p>Parametersinclude_holdout: bool, default=False Whether to include the holdout set (if available) in the training of the estimator. It's discouraged to use this option since it means the model can no longer be evaluated on any set. </p> <p></p> <p>method get_best_threshold(dataset=\"train\")[source]Get the threshold that maximizes the ROC curve.</p> <p>Only available for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersdataset: str, default=\"train\" Data set on which to calculate the threshold. Choose from: train, test, dataset. <p>Returnsfloat or list Best threshold or list of thresholds for multilabel tasks. </p> <p></p> <p>method hyperparameter_tuning(n_trials, reset=False)[source]Run the hyperparameter tuning algorithm.</p> <p>Search for the best combination of hyperparameters. The function to optimize is evaluated either with a K-fold cross-validation on the training set or using a random train and validation split every trial. Use this method to continue the optimization.</p> <p>Parametersn_trials: int Number of trials for the hyperparameter tuning. <p>reset: bool, default=False Whether to start a new study or continue the existing one. </p> <p></p> <p>method inverse_transform(X=None, y=None, verbose=None)[source]Inversely transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. The rest should all implement a <code>inverse_transform</code> method. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, inversely transform only the target column. If called from a model that used automated feature scaling, the scaling is inverted as well.</p> <p>ParametersX: dataframe-like or None, default=None Transformed feature set with shape=(n_samples, n_features). If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Original feature set. Only returned if provided. <p>series Original target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method predict(X, verbose=None)[source]Get predictions on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsseries or dataframe Predictions with shape=(n_samples,) or shape=(n_samples, n_targets) for multioutput tasks. </p> <p></p> <p>method predict_log_proba(X, verbose=None)[source]Get class log-probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_log_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class log-probability predictions with shape=(n_samples, n_classes). </p> <p></p> <p>method predict_proba(X, verbose=None)[source]Get class probabilities on new data or existing rows.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped. The estimator must have a <code>predict_proba</code> method.</p> <p>Read more in the user guide.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or indices of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Class probability predictions with shape=(n_samples, n_classes) or (n_targets * n_samples, n_classes) with a multiindex format for multioutput tasks. </p> <p></p> <p>method register(name=None, stage=\"None\", archive_existing_versions=False)[source]Register the model in mlflow's model registry.</p> <p>This method is only available when model tracking is enabled using one of the following URI schemes: databricks, http, https, postgresql, mysql, sqlite, mssql.</p> <p>Parametersname: str or None, default=None Name for the registered model. If None, the model's full name is used. If the name of the model already exists, a new model version is created. <p>stage: str, default=\"None\" New desired stage for the model. <p>archive_existing_versions: bool, default=False Whether all existing model versions in the <code>stage</code> will be moved to the \"Archived\" stage. Only valid when <code>stage</code> is \"Staging\" or \"Production\" otherwise an error will be raised. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method save_estimator(filename=\"auto\")[source]Save the estimator to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. </p> <p></p> <p>method score(X, y=None, metric=None, sample_weight=None, verbose=None)[source]Get a metric score on new data.</p> <p>New data is first transformed through the model's pipeline. Transformers that are only applied on the training set are skipped.</p> <p>Read more in the user guide.</p> <p>Info</p> <p>If the <code>metric</code> parameter is left to its default value, the method returns atom's metric score, not the metric returned by sklearn's score method for estimators.</p> <p>ParametersX: int, str, slice, sequence or dataframe-like Names or positions of rows in the dataset, or new feature set with shape=(n_samples, n_features). <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred) -&gt; score</code> or a scorer object. If None, it uses atom's metric (the main metric for multi-metric runs). <p>sample_weight: sequence or None, default=None Sample weights corresponding to y. <p>verbose: int or None, default=None Verbosity level of the output. If None, it uses the transformer's own verbosity. <p>Returnsfloat Metric score of X with respect to y. </p> <p></p> <p>method serve(method=\"predict\", host=\"127.0.0.1\", port=8000)[source]Serve the model as rest API endpoint for inference.</p> <p>The complete pipeline is served with the model. The inference data must be supplied as json to the HTTP request, e.g. <code>requests.get(\"http://127.0.0.1:8000/\", json=X.to_json())</code>. The deployment is done on a ray cluster. The default <code>host</code> and <code>port</code> parameters deploy to localhost.</p> <p>Tip</p> <p>Use <code>import ray; ray.serve.shutdown()</code> to close the endpoint after finishing.</p> <p>Parametersmethod: str, default=\"predict\" Estimator's method to do inference on. <p>host: str, default=\"127.0.0.1\" Host for HTTP servers to listen on. To expose serve publicly, you probably want to set this to \"0.0.0.0\". <p>port: int, default=8000 Port for HTTP server. </p> <p></p> <p>method transform(X=None, y=None, verbose=None)[source]Transform new data through the pipeline.</p> <p>Transformers that are only applied on the training set are skipped. If only <code>X</code> or only <code>y</code> is provided, it ignores transformers that require the other parameter. This can be of use to, for example, transform only the target column. If called from a model that used automated feature scaling, the data is scaled as well.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. If None, X is ignored in the transformers. <p>y: int, str, dict, sequence, dataframe or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target array with shape=(n_samples,) or   sequence of column names or positions for multioutput tasks.</li> <li>If dataframe: Target columns for multioutput tasks.</li> </ul> <p>verbose: int or None, default=None Verbosity level for the transformers. If None, it uses the transformer's own verbosity. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p>"}, {"location": "API/nlp/textcleaner/", "title": "TextCleaner", "text": "<p>class atom.nlp.TextCleaner(decode=True, lower_case=True, drop_email=True, regex_email=None, drop_url=True, regex_url=None, drop_html=True, regex_html=None, drop_emoji=True, regex_emoji=None, drop_number=True, regex_number=None, drop_punctuation=True, verbose=0, logger=None)[source]Applies standard text cleaning to the corpus.</p> <p>Transformations include normalizing characters and dropping noise from the text (emails, HTML tags, URLs, etc...). The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised.</p> <p>This class can be accessed from atom through the textclean method. Read more in the user guide.</p> <p>Parametersdecode: bool, default=True Whether to decode unicode characters to their ascii representations. <p>lower_case: bool, default=True Whether to convert all characters to lower case. <p>drop_email: bool, default=True Whether to drop email addresses from the text. <p>regex_email: str, default=None Regex used to search for email addresses. If None, it uses <code>r\"[\\w.-]+@[\\w-]+\\.[\\w.-]+\"</code>. <p>drop_url: bool, default=True Whether to drop URL links from the text. <p>regex_url: str, default=None Regex used to search for URLs. If None, it uses <code>r\"https?://\\S+|www\\.\\S+\"</code>. <p>drop_html: bool, default=True Whether to drop HTML tags from the text. This option is particularly useful if the data was scraped from a website. <p>regex_html: str, default=None Regex used to search for html tags. If None, it uses <code>r\"&lt;.*?&gt;\"</code>. <p>drop_emoji: bool, default=True Whether to drop emojis from the text. <p>regex_emoji: str, default=None Regex used to search for emojis. If None, it uses <code>r\":[a-z_]+:\"</code>. <p>drop_number: bool, default=True Whether to drop numbers from the text. <p>regex_number: str, default=None Regex used to search for numbers. If None, it uses <code>r\"\b\\d+\b\".</code> Note that numbers adjacent to letters are not removed. <p>drop_punctuation: bool, default=True Whether to drop punctuations from the text. Characters considered punctuation are <code>!\"#$%&amp;'()*+,-./:;&lt;=&gt;?@[\\]^_</code>~`. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesdrops: pd.DataFrame Encountered regex matches. The row indices correspond to the document index from which the occurrence was dropped. <p></p> <p></p> <p>See Also</p> <p>TextNormalizer Normalize the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/textcleaner/#example", "title": "Example", "text": "atomstand-alone <pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\nprint(atom.dataset)\n\natom.textclean(verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>import numpy as np\nfrom atom.nlp import TextCleaner\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\ntextcleaner = TextCleaner(verbose=2)\nX = textcleaner.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/textcleaner/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformApply the transformations to the data.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Apply the transformations to the data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/textnormalizer/", "title": "TextNormalizer", "text": "<p>class atom.nlp.TextNormalizer(stopwords=True, custom_stopwords=None, stem=False, lemmatize=True, verbose=0, logger=None)[source]Normalize the corpus.</p> <p>Convert words to a more uniform standard. The transformations are applied on the column named <code>corpus</code>, in the same order the parameters are presented. If there is no column with that name, an exception is raised. If the provided documents are strings, words are separated by spaces.</p> <p>This class can be accessed from atom through the textnormalize method. Read more in the user guide.</p> <p>Parametersstopwords: bool or str, default=True Whether to remove a predefined dictionary of stopwords. <ul> <li>If False: Don't remove any predefined stopwords.</li> <li>If True: Drop predefined english stopwords from the text.</li> <li>If str: Language from <code>nltk.corpus.stopwords.words</code>.</li> </ul> <p>custom_stopwords: sequence or None, default=None Custom stopwords to remove from the text. <p>stem: bool or str, default=False Whether to apply stemming using SnowballStemmer. <ul> <li>If False: Don't apply stemming.</li> <li>If True: Apply stemmer based on the english language.</li> <li>If str: Language from <code>SnowballStemmer.languages</code>.</li> </ul> <p>lemmatize: bool, default=True Whether to apply lemmatization using WordNetLemmatizer. <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/textnormalizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.textnormalize(stopwords=\"english\", lemmatize=True, verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import TextNormalizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\ntextnormalizer = TextNormalizer(\n    stopwords=\"english\",\n    lemmatize=True,\n    verbose=2,\n)\nX = textnormalizer.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/textnormalizer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformNormalize the text.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Normalize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/tokenizer/", "title": "Tokenizer", "text": "<p>class atom.nlp.Tokenizer(bigram_freq=None, trigram_freq=None, quadgram_freq=None, verbose=0, logger=None)[source]Tokenize the corpus.</p> <p>Convert documents into sequences of words. Additionally, create n-grams (represented by words united with underscores, e.g. \"New_York\") based on their frequency in the corpus. The transformations are applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>This class can be accessed from atom through the tokenize method. Read more in the user guide.</p> <p>Parametersbigram_freq: int, float or None, default=None Frequency threshold for bigram creation. <ul> <li>If None: Don't create any bigrams.</li> <li>If int: Minimum number of occurrences to make a bigram.</li> <li>If float: Minimum frequency fraction to make a bigram.</li> </ul> <p>trigram_freq: int, float or None, default=None Frequency threshold for trigram creation. <ul> <li>If None: Don't create any trigrams.</li> <li>If int: Minimum number of occurrences to make a trigram.</li> <li>If float: Minimum frequency fraction to make a trigram.</li> </ul> <p>quadgram_freq: int, float or None, default=None Frequency threshold for quadgram creation. <ul> <li>If None: Don't create any quadgrams.</li> <li>If int: Minimum number of occurrences to make a quadgram.</li> <li>If float: Minimum frequency fraction to make a quadgram.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>Attributesbigrams: pd.DataFrame Created bigrams and their frequencies. <p>trigrams: pd.DataFrame Created trigrams and their frequencies. <p>quadgrams: pd.DataFrame Created quadgrams and their frequencies. <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>TextNormalizer Normalize the corpus.</p> <p>Vectorizer Vectorize text data.</p> <p></p>"}, {"location": "API/nlp/tokenizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.tokenize(verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import Tokenizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\ntokenizer = Tokenizer(bigram_freq=2, verbose=2)\nX = tokenizer.transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/tokenizer/#methods", "title": "Methods", "text": "<p>fitDoes nothing.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformTokenize the text.</p> <p></p> <p>method fit(X=None, y=None, **fit_params)[source]Does nothing.</p> <p>Implemented for continuity of the API.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsself Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Tokenize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/nlp/vectorizer/", "title": "Vectorizer", "text": "<p>class atom.nlp.Vectorizer(strategy=\"bow\", return_sparse=True, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, verbose=0, logger=None, **kwargs)[source]Vectorize text data.</p> <p>Transform the corpus into meaningful vectors of numbers. The transformation is applied on the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>If strategy=\"bow\" or \"tfidf\", the transformed columns are named after the word they are embedding with the prefix <code>corpus_</code>. If strategy=\"hashing\", the columns are named hash[N], where N stands for the n-th hashed column.</p> <p>This class can be accessed from atom through the vectorize method. Read more in the user guide.</p> <p>Parametersstrategy: str, default=\"bow\" Strategy with which to vectorize the text. Choose from: <ul> <li>\"bow\": Bag of Words.</li> <li>\"tfidf\": Term Frequency - Inverse Document Frequency.</li> <li>\"hashing\": Vectorize to a matrix of token occurrences.</li> </ul> <p>return_sparse: bool, default=True Whether to return the transformation output as a dataframe of sparse arrays. Must be False when there are other columns in X (besides <code>corpus</code>) that are non-sparse. <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic naming.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>**kwargs Additional keyword arguments for the <code>strategy</code> estimator. <p>Attributes[strategy]: sklearn transformer Estimator instance (lowercase strategy) used to vectorize the corpus, e.g. <code>vectorizer.tfidf</code> for the tfidf strategy. <p>feature_names_in_: np.array Names of features seen during fit. <p>n_features_in_: int Number of features seen during fit. <p></p> <p></p> <p>See Also</p> <p>TextCleaner Applies standard text cleaning to the corpus.</p> <p>TextNormalizer Normalize the corpus.</p> <p>Tokenizer Tokenize the corpus.</p> <p></p>"}, {"location": "API/nlp/vectorizer/#example", "title": "Example", "text": "atomstand-alone <pre><code>from atom import ATOMClassifier\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\ny = [1, 0, 0, 1, 1, 1, 0, 0]\n\natom = ATOMClassifier(X, y, test_size=2, random_state=1)\nprint(atom.dataset)\n\natom.vectorize(strategy=\"tfidf\", verbose=2)\n\nprint(atom.dataset)\n</code></pre> <pre><code>from atom.nlp import Vectorizer\n\nX = [\n   [\"I \u00e0m in ne'w york\"],\n   [\"New york is nice\"],\n   [\"new york\"],\n   [\"hi there this is a test!\"],\n   [\"another line...\"],\n   [\"new york is larger than washington\"],\n   [\"running the test\"],\n   [\"this is a test\"],\n]\n\nvectorizer = Vectorizer(strategy=\"tfidf\", verbose=2)\nX = vectorizer.fit_transform(X)\n\nprint(X)\n</code></pre>"}, {"location": "API/nlp/vectorizer/#methods", "title": "Methods", "text": "<p>fitFit to data.fit_transformFit to data, then transform it.get_metadata_routingGet metadata routing of this object.get_paramsGet parameters for this estimator.inverse_transformDoes nothing.logPrint message and save to log file.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.transformVectorize the text.</p> <p></p> <p>method fit(X, y=None)[source]Fit to data.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>ReturnsVectorizer Estimator instance. </p> <p></p> <p>method fit_transform(X=None, y=None, **fit_params)[source]Fit to data, then transform it.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>**fit_params Additional keyword arguments for the fit method. <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method get_metadata_routing()[source]Get metadata routing of this object.</p> <p>Returnsrouting : MetadataRequest A :class:<code>~utils.metadata_routing.MetadataRequest</code> encapsulating routing information. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method inverse_transform(X=None, y=None)[source]Does nothing.</p> <p>ParametersX: dataframe-like or None, default=None Feature set with shape=(n_samples, n_features). If None, X is ignored. <p>y: int, str, sequence, dataframe-like or None, default=None Target column corresponding to X. <ul> <li>If None: y is ignored.</li> <li>If int: Position of the target column in X.</li> <li>If str: Name of the target column in X.</li> <li>If sequence: Target column with shape=(n_samples,) or   sequence of column names or positions for multioutput   tasks.</li> <li>If dataframe-like: Target columns with shape=(n_samples,   n_targets) for multioutput tasks.</li> </ul> <p>Returnsdataframe Transformed feature set. Only returned if provided. <p>series Transformed target column. Only returned if provided. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method transform(X, y=None)[source]Vectorize the text.</p> <p>ParametersX: dataframe-like Feature set with shape=(n_samples, n_features). If X is not a dataframe, it should be composed of a single feature containing the text documents. <p>y: int, str, sequence, dataframe-like or None, default=None Does nothing. Implemented for continuity of the API. <p>Returnsdataframe Transformed corpus. </p> <p></p>"}, {"location": "API/plots/plot_calibration/", "title": "plot_calibration", "text": "<p>method plot_calibration(models=None, dataset=\"test\", n_bins=10, target=0, title=None, legend=\"upper left\", figsize=(900, 900), filename=None, display=True)[source]Plot the calibration curve for a binary classifier.</p> <p>Well calibrated classifiers are probabilistic classifiers for which the output of the <code>predict_proba</code> method can be directly interpreted as a confidence level. For instance a well calibrated (binary) classifier should classify the samples such that among the samples to which it gave a <code>predict_proba</code> value close to 0.8, approx. 80% actually belong to the positive class. Read more in sklearn's documentation.</p> <p>This figure shows two plots: the calibration curve, where the x-axis represents the average predicted probability in each bin and the y-axis is the fraction of positives, i.e. the proportion of samples whose class is the positive class (in each bin); and a distribution of all predicted probabilities of the classifier. This plot is available only for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Tip</p> <p>Use the calibrate method to calibrate the winning model.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>n_bins: int, default=10 Number of bins used for calibration. Minimum of 5 required. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 900) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_calibration/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"RF\", \"LGB\"])\natom.plot_calibration()\n</code></pre>"}, {"location": "API/plots/plot_components/", "title": "plot_components", "text": "<p>method plot_components(show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the explained variance ratio per component.</p> <p>Kept components are colored and discarted components are transparent. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parametersshow: int or None, default=None Number of components to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of components shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_pca Plot the explained variance ratio vs number of components.</p> <p>plot_rfecv Plot the rfecv results.</p> <p></p>"}, {"location": "API/plots/plot_components/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"pca\", n_features=5)\natom.plot_components(show=10)\n</code></pre>"}, {"location": "API/plots/plot_confusion_matrix/", "title": "plot_confusion_matrix", "text": "<p>method plot_confusion_matrix(models=None, dataset=\"test\", target=0, threshold=0.5, title=None, legend=\"upper right\", figsize=None, filename=None, display=True)[source]Plot a model's confusion matrix.</p> <p>For one model, the plot shows a heatmap. For multiple models, it compares TP, FP, FN and TN in a barplot (not implemented for multiclass classification tasks). This plot is available only for classification tasks.</p> <p>Tip</p> <p>Fill the <code>threshold</code> parameter with the result from the model's <code>get_best_threshold</code> method to optimize the results.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the confusion matrix. Choose from:` \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>threshold: float, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only for binary classification tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the plot's type. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_calibration Plot the calibration curve for a binary classifier.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_confusion_matrix/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, test_size=0.4)\natom.run([\"LR\", \"RF\"])\natom.lr.plot_confusion_matrix()  # For one model\natom.plot_confusion_matrix()  # For multiple models\n</code></pre>"}, {"location": "API/plots/plot_correlation/", "title": "plot_correlation", "text": "<p>method plot_correlation(columns=None, method=\"pearson\", title=None, legend=None, figsize=(800, 700), filename=None, display=True)[source]Plot a correlation matrix.</p> <p>Displays a heatmap showing the correlation between columns in the dataset. The colors red, blue and white stand for positive, negative, and no correlation respectively.</p> <p>Parameterscolumns: slice, sequence or None, default=None Columns to plot. If None, plot all columns in the dataset. Selected categorical columns are ignored. <p>method: str, default=\"pearson\" Method of correlation. Choose from: pearson, kendall or spearman. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(800, 700) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_distribution Plot column distributions.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_correlation/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_correlation()\n</code></pre>"}, {"location": "API/plots/plot_det/", "title": "plot_det", "text": "<p>method plot_det(models=None, dataset=\"test\", target=0, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the Detection Error Tradeoff curve.</p> <p>Read more about DET in sklearn's documentation. Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_det/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_det()\n</code></pre>"}, {"location": "API/plots/plot_distribution/", "title": "plot_distribution", "text": "<p>method plot_distribution(columns=0, distributions=None, show=None, title=None, legend=\"upper right\", figsize=None, filename=None, display=True)[source]Plot column distributions.</p> <ul> <li>For numerical columns, plot the probability density   distribution. Additionally, it's possible to plot any of   <code>scipy.stats</code> distributions fitted to the column.</li> <li>For categorical columns, plot the class distribution.   Only one categorical column can be plotted at the same time.</li> </ul> <p>Tip</p> <p>Use atom's distribution method to check which distribution fits the column best.</p> <p>Parameterscolumns: int, str, slice or sequence, default=0 Columns to plot. I's only possible to plot one categorical column. If more than one categorical columns are selected, all categorical columns are ignored. <p>distributions: str, sequence or None, default=None Names of the <code>scipy.stats</code> distributions to fit to the columns. If None, a Gaussian kde distribution is showed. Only for numerical columns. <p>show: int or None, default=None Number of classes (ordered by number of occurrences) to show in the plot. If None, it shows all classes. Only for categorical columns. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None: No title is shown.</li> <li>If str: Text for the title.</li> <li>If dict: title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the plot's type. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_distribution/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\n# Add a categorical feature\nanimals = [\"cat\", \"dog\", \"bird\", \"lion\", \"zebra\"]\nprobabilities = [0.001, 0.1, 0.2, 0.3, 0.399]\nX[\"animals\"] = np.random.choice(animals, size=len(X), p=probabilities)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_distribution(columns=[0, 1])\natom.plot_distribution(columns=0, distributions=[\"norm\", \"invgauss\"])\natom.plot_distribution(columns=\"animals\")\n</code></pre>"}, {"location": "API/plots/plot_edf/", "title": "plot_edf", "text": "<p>method plot_edf(models=None, metric=None, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot the Empirical Distribution Function of a study.</p> <p>Use this plot to analyze and improve hyperparameter search spaces. The EDF assumes that the value of the objective function is in accordance with the uniform distribution over the objective space. This plot is only available for models that ran hyperparameter tuning.</p> <p>Note</p> <p>Only complete trials are considered when plotting the EDF.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_edf/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom optuna.distributions import IntDistribution\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\n\n# Run three models with different search spaces\natom.run(\n    models=\"RF_1\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(6, 10)}},\n)\natom.run(\n    models=\"RF_2\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(11, 15)}},\n)\natom.run(\n    models=\"RF_3\",\n    n_trials=10,\n    ht_params={\"distributions\": {\"n_estimators\": IntDistribution(16, 20)}},\n)\n\natom.plot_edf()\n</code></pre>"}, {"location": "API/plots/plot_errors/", "title": "plot_errors", "text": "<p>method plot_errors(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot a model's prediction errors.</p> <p>Plot the actual targets from a set against the predicted values generated by the regressor. A linear fit is made on the data. The gray, intersected line shows the identity line. This plot can be useful to detect noise or heteroscedasticity along a range of the target domain. This plot is available only for regression tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_residuals Plot a model's residuals.</p> <p></p>"}, {"location": "API/plots/plot_errors/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run([\"OLS\", \"LGB\"])\natom.plot_errors()\n</code></pre>"}, {"location": "API/plots/plot_evals/", "title": "plot_evals", "text": "<p>method plot_evals(models=None, dataset=\"test\", title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot evaluation curves.</p> <p>The evaluation curves are the main metric scores achieved by the models at every iteration of the training process. This plot is available only for models that allow in-training validation.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the evaluation curves. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\" or \"test\". <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_evals/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"XGB\", \"LGB\"])\natom.plot_evals()\n</code></pre>"}, {"location": "API/plots/plot_feature_importance/", "title": "plot_feature_importance", "text": "<p>method plot_feature_importance(models=None, show=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot a model's feature importance.</p> <p>The sum of importances for all features (per model) is 1. This plot is available only for models whose estimator has a <code>scores_</code>, <code>feature_importances_</code> or <code>coef</code> attribute.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_feature_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_feature_importance(show=10)\n</code></pre>"}, {"location": "API/plots/plot_forecast/", "title": "plot_forecast", "text": "<p>method plot_forecast(models=None, fh=\"test\", X=None, target=0, plot_interval=True, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot a time series with model forecasts.</p> <p>This plot is only available for forecasting tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. If no models are selected, only the target column is plotted. <p>fh: int, str, range, sequence or ForecastingHorizon, default=\"test\" Forecast horizon for which to plot the predictions. If string, choose from: \"train\", \"test\" or \"holdout\". Use a sequence or add <code>+</code> between options to select more than one. <p>X: dataframe-like or None, default=None Exogenous time series corresponding to fh. This parameter is ignored if fh is a data set. <p>target: int or str, default=0 Target column to look at. Only for multivariate tasks. <p>plot_interval: bool, default=True Whether to plot prediction intervals instead of the exact prediction values. If True, the plotted estimators should have a <code>predict_interval</code> method. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_forecast/#example", "title": "Example", "text": "<pre><code>from atom import ATOMForecaster\nfrom sktime.datasets import load_airline\n\ny = load_airline()\n\natom = ATOMForecaster(y, random_state=1)\natom.plot_forecast()\natom.run(\n    models=\"arima\",\n    est_params={\"order\": (1, 1, 0), \"seasonal_order\": (0, 1, 0, 12)},\n)\natom.plot_forecast()\natom.plot_forecast(fh=\"train+test\", plot_interval=False)\n\n# Forecast the next 4 years starting from the test set\natom.plot_forecast(fh=range(1, 48))\n</code></pre>"}, {"location": "API/plots/plot_gains/", "title": "plot_gains", "text": "<p>method plot_gains(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the cumulative gains curve.</p> <p>This plot is available only for binary and multilabel classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_gains/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_gains()\n</code></pre>"}, {"location": "API/plots/plot_hyperparameter_importance/", "title": "plot_hyperparameter_importance", "text": "<p>method plot_hyperparameter_importance(models=None, metric=0, show=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot a model's hyperparameter importance.</p> <p>The hyperparameter importance are calculated using the fANOVA importance evaluator. The sum of importances for all parameters (per model) is 1. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>show: int or None, default=None Number of hyperparameters (ordered by importance) to show. None to show all. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_hyperparameter_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"ET\", \"RF\"], n_trials=10)\natom.plot_hyperparameter_importance()\n</code></pre>"}, {"location": "API/plots/plot_hyperparameters/", "title": "plot_hyperparameters", "text": "<p>method plot_hyperparameters(models=None, params=(0, 1), metric=0, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot hyperparameter relationships in a study.</p> <p>A model's hyperparameters are plotted against each other. The corresponding metric scores are displayed in a contour plot. The markers are the trials in the study. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_hyperparameters()</code>. <p>params: str, slice or sequence, default=(0, 1) Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_hyperparameter_importance Plot a model's hyperparameter importance.</p> <p>plot_parallel_coordinate Plot high-dimensional parameter relationships in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_hyperparameters/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\", n_trials=15)\natom.plot_hyperparameters(params=(0, 1, 2))\n</code></pre>"}, {"location": "API/plots/plot_learning_curve/", "title": "plot_learning_curve", "text": "<p>method plot_learning_curve(models=None, metric=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the learning curve: score vs number of training samples.</p> <p>This plot is available only for models fitted using train sizing. Ensembles are ignored.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_results Plot the model results.</p> <p>plot_successive_halving Plot scores per iteration of the successive halving.</p> <p></p>"}, {"location": "API/plots/plot_learning_curve/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.train_sizing([\"LR\", \"RF\"], n_bootstrap=5)\natom.plot_learning_curve()\n</code></pre>"}, {"location": "API/plots/plot_lift/", "title": "plot_lift", "text": "<p>method plot_lift(models=None, dataset=\"test\", target=0, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the lift curve.</p> <p>Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_lift/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_lift()\n</code></pre>"}, {"location": "API/plots/plot_ngrams/", "title": "plot_ngrams", "text": "<p>method plot_ngrams(ngram=\"bigram\", index=None, show=10, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot n-gram frequencies.</p> <p>The text for the plot is extracted from the column named <code>corpus</code>. If there is no column with that name, an exception is raised. If the documents are not tokenized, the words are separated by spaces.</p> <p>Tip</p> <p>Use atom's tokenize method to separate the words creating n-grams based on their frequency in the corpus.</p> <p>Parametersngram: str or int, default=\"bigram\" Number of contiguous words to search for (size of n-gram). Choose from: words (1), bigrams (2), trigrams (3), quadgrams (4). <p>index: int, str, slice, sequence or None, default=None Documents in the corpus to include in the search. If None, it selects all documents in the dataset. <p>show: int, default=10 Number of n-grams (ordered by number of occurrences) to show in the plot. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of n-grams shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_wordcloud Plot a wordcloud from the corpus.</p> <p></p>"}, {"location": "API/plots/plot_ngrams/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.textclean()\natom.textnormalize()\natom.plot_ngrams()\n</code></pre>"}, {"location": "API/plots/plot_parallel_coordinate/", "title": "plot_parallel_coordinate", "text": "<p>method plot_parallel_coordinate(models=None, params=None, metric=0, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot high-dimensional parameter relationships in a study.</p> <p>Every line of the plot represents one trial. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_parallel_coordinate()</code>. <p>params: str, slice, sequence or None, default=None Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. If None, all the model's hyperparameters are selected. <p>metric: int or str, default=0 Metric to plot (only for multi-metric runs). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_hyperparameter_importance Plot a model's hyperparameter importance.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p></p>"}, {"location": "API/plots/plot_parallel_coordinate/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"RF\", n_trials=15)\natom.plot_parallel_coordinate(params=slice(1, 5))\n</code></pre>"}, {"location": "API/plots/plot_pareto_front/", "title": "plot_pareto_front", "text": "<p>method plot_pareto_front(models=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot the Pareto front of a study.</p> <p>Shows the trial scores plotted against each other. The marker's colors indicate the trial number. This plot is only available for models that ran multi-metric runs with hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_pareto_front()</code>. <p>metric: str, sequence or None, default=None Metrics to plot.  Use a sequence or add <code>+</code> between options to select more than one. If None, the metrics used to run the pipeline are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of metrics shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_slice Plot the parameter relationship in a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_pareto_front/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RF\",\n    metric=[\"f1\", \"accuracy\", \"recall\"],\n    n_trials=15,\n )\natom.plot_pareto_front()\n</code></pre>"}, {"location": "API/plots/plot_parshap/", "title": "plot_parshap", "text": "<p>method plot_parshap(models=None, columns=None, target=1, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot the partial correlation of shap values.</p> <p>Plots the train and test correlation between the shap value of every feature with its target value, after removing the effect of all other features (partial correlation). This plot is useful to identify the features that are contributing most to overfitting. Features that lie below the bisector (diagonal line) performed worse on the test set than on the training set. If the estimator has a <code>scores_</code>, <code>feature_importances_</code> or <code>coef_</code> attribute, its normalized values are shown in a color map.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>columns: int, str, slice, sequence or None, default=None Features to plot. If None, it plots all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_parshap/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"RF\"])\natom.rf.plot_parshap(legend=None)\natom.plot_parshap(columns=slice(5, 10))\n</code></pre>"}, {"location": "API/plots/plot_partial_dependence/", "title": "plot_partial_dependence", "text": "<p>method plot_partial_dependence(models=None, columns=None, kind=\"average\", pair=None, target=1, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the partial dependence of features.</p> <p>The partial dependence of a feature (or a set of features) corresponds to the response of the model for each possible value of the feature. The plot can take two forms:</p> <ul> <li>If <code>pair</code> is None: Single feature partial dependence lines.   The deciles of the feature values are shown with tick marks   on the bottom.</li> <li>If <code>pair</code> is defined: Two-way partial dependence plots are   plotted as contour plots (only allowed for a single model).</li> </ul> <p>Read more about partial dependence on sklearn's documentation. This plot is not available for multilabel nor multiclass-multioutput classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>columns: int, str, slice, sequence or None, default=None Features to get the partial dependence from. If None, it uses the first 3 features in the dataset. <p>kind: str or sequence, default=\"average\" Kind of depedence to plot. Use a sequence or add <code>+</code> between options to select more than one. Choose from: <ul> <li>\"average\": Partial dependence averaged across all samples   in the dataset.</li> <li>\"individual\": Partial dependence for up to 50 random   samples (Individual Conditional Expectation).</li> </ul> <p>This parameter is ignored when plotting feature pairs.</p> <p>pair: int, str or None, default=None Feature with which to pair the features selected by <code>columns</code>. If specified, the resulting figure displays contour plots. Only allowed when plotting a single model. If None, the plots show the partial dependece of single features. <p>target: int or str, default=1 Class in the target column to look at (only for multiclass classification tasks). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_permutation_importance Plot the feature permutation importance of models.</p> <p></p>"}, {"location": "API/plots/plot_partial_dependence/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_partial_dependence(kind=\"average+individual\", legend=\"upper left\")\natom.rf.plot_partial_dependence(columns=(3, 4), pair=2)\n</code></pre>"}, {"location": "API/plots/plot_pca/", "title": "plot_pca", "text": "<p>method plot_pca(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the explained variance ratio vs number of components.</p> <p>If the underlying estimator is PCA (for dense datasets), all possible components are plotted. If the underlying estimator is TruncatedSVD (for sparse datasets), it only shows the selected components. The star marks the number of components selected by the user. This plot is available only when feature selection was applied with strategy=\"pca\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_components Plot the explained variance ratio per component.</p> <p>plot_rfecv Plot the rfecv results.</p> <p></p>"}, {"location": "API/plots/plot_pca/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"pca\", n_features=5)\natom.plot_pca()\n</code></pre>"}, {"location": "API/plots/plot_permutation_importance/", "title": "plot_permutation_importance", "text": "<p>method plot_permutation_importance(models=None, show=None, n_repeats=10, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the feature permutation importance of models.</p> <p>Warning</p> <p>This method can be slow. Results are cached to fasten repeated calls.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>n_repeats: int, default=10 Number of times to permute each feature. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_feature_importance Plot a model's feature importance.</p> <p>plot_partial_dependence Plot the partial dependence of features.</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p></p>"}, {"location": "API/plots/plot_permutation_importance/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_permutation_importance(show=10, n_repeats=7)\n</code></pre>"}, {"location": "API/plots/plot_pipeline/", "title": "plot_pipeline", "text": "<p>method plot_pipeline(models=None, draw_hyperparameter_tuning=True, color_branches=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot a diagram of the pipeline.</p> <p>Warning</p> <p>This plot uses the schemdraw package, which is incompatible with plotly. The returned plot is therefore a matplotlib figure.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models for which to draw the pipeline. If None, all pipelines are plotted. <p>draw_hyperparameter_tuning: bool, default=True Whether to draw if the models used Hyperparameter Tuning. <p>color_branches: bool or None, default=None Whether to draw every branch in a different color. If None, branches are colored when there is more than one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the pipeline drawn. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_wordcloud Plot a wordcloud from the corpus.</p> <p></p>"}, {"location": "API/plots/plot_pipeline/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"RNN\", \"SGD\", \"MLP\"])\natom.voting(models=atom.winners[:2])\natom.plot_pipeline()\n\natom = ATOMClassifier(X, y, random_state=1)\natom.scale()\natom.prune()\natom.run(\"RF\", n_trials=30)\n\natom.branch = \"undersample\"\natom.balance(\"nearmiss\")\natom.run(\"RF_undersample\")\n\natom.branch = \"oversample_from_master\"\natom.balance(\"smote\")\natom.run(\"RF_oversample\")\n\natom.plot_pipeline()\n</code></pre>"}, {"location": "API/plots/plot_prc/", "title": "plot_prc", "text": "<p>method plot_prc(models=None, dataset=\"test\", target=0, title=None, legend=\"lower left\", figsize=(900, 600), filename=None, display=True)[source]Plot the precision-recall curve.</p> <p>Read more about PRC in sklearn's documentation. Only available for binary classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_det Plot the Detection Error Tradeoff curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_roc Plot the Receiver Operating Characteristics curve.</p> <p></p>"}, {"location": "API/plots/plot_prc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_prc()\n</code></pre>"}, {"location": "API/plots/plot_probabilities/", "title": "plot_probabilities", "text": "<p>method plot_probabilities(models=None, dataset=\"test\", target=1, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the probability distribution of the target classes.</p> <p>This plot is available only for models with a <code>predict_proba</code> method in classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int, str or tuple, default=1 Probability of being that class in the target column. For multioutput tasks, the value should be a tuple of the form (column, class). <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_results Plot the model results.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_probabilities/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_probabilities()\n</code></pre>"}, {"location": "API/plots/plot_qq/", "title": "plot_qq", "text": "<p>method plot_qq(columns=0, distributions=\"norm\", title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot a quantile-quantile plot.</p> <p>Columns are distinguished by color and the distributions are distinguished by marker type. Missing values are ignored.</p> <p>Parameterscolumns: int, str, slice or sequence, default=0 Columns to plot. Selected categorical columns are ignored. <p>distributions: str or sequence, default=\"norm\" Names of the <code>scipy.stats</code> distributions to fit to the columns. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_distribution Plot column distributions.</p> <p>plot_relationships Plot pairwise relationships in a dataset.</p> <p></p>"}, {"location": "API/plots/plot_qq/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_qq(columns=[5, 6])\natom.plot_qq(columns=0, distributions=[\"norm\", \"invgauss\", \"triang\"])\n</code></pre>"}, {"location": "API/plots/plot_relationships/", "title": "plot_relationships", "text": "<p>method plot_relationships(columns=(0, 1, 2), title=None, legend=None, figsize=(900, 900), filename=None, display=True)[source]Plot pairwise relationships in a dataset.</p> <p>Creates a grid of axes such that each numerical column appears once on the x-axes and once on the y-axes. The bottom triangle contains scatter plots (max 250 random samples), the diagonal plots contain column distributions, and the upper triangle contains contour histograms for all samples in the columns.</p> <p>Parameterscolumns: slice or sequence, default=(0, 1, 2) Columns to plot. Selected categorical columns are ignored. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 900) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_correlation Plot a correlation matrix.</p> <p>plot_distribution Plot column distributions.</p> <p>plot_qq Plot a quantile-quantile plot.</p> <p></p>"}, {"location": "API/plots/plot_relationships/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.plot_relationships(columns=[0, 4, 5])\n</code></pre>"}, {"location": "API/plots/plot_residuals/", "title": "plot_residuals", "text": "<p>method plot_residuals(models=None, dataset=\"test\", target=0, title=None, legend=\"upper left\", figsize=(900, 600), filename=None, display=True)[source]Plot a model's residuals.</p> <p>The plot shows the residuals (difference between the predicted and the true value) on the vertical axis and the independent variable on the horizontal axis. The gray, intersected line shows the identity line. This plot can be useful to analyze the variance of the error of the regressor. If the points are randomly dispersed around the horizontal axis, a linear regression model is appropriate for the data; otherwise, a non-linear model is more appropriate. This plot is only available for regression tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multioutput tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_errors Plot a model's prediction errors.</p> <p></p>"}, {"location": "API/plots/plot_residuals/#example", "title": "Example", "text": "<pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run([\"OLS\", \"LGB\"])\natom.plot_residuals()\n</code></pre>"}, {"location": "API/plots/plot_results/", "title": "plot_results", "text": "<p>method plot_results(models=None, metric=None, title=None, legend=\"lower right\", figsize=None, filename=None, display=True)[source]Plot the model results.</p> <p>If all models applied bootstrap, the plot is a boxplot. If not, the plot is a barplot. Models are ordered based on their score from the top down. The score is either the <code>score_bootstrap</code> or <code>score_test</code> attribute of the model, selected in that order.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Other available options are \"time_bo\", \"time_fit\", \"time_bootstrap\" and \"time\". If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of models. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_probabilities Plot the probability distribution of the target classes.</p> <p>plot_threshold Plot metric performances against threshold values.</p> <p></p>"}, {"location": "API/plots/plot_results/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"GNB\", \"LR\", \"RF\", \"LGB\"], metric=[\"f1\", \"recall\"])\natom.plot_results()\n\natom.run([\"GNB\", \"LR\", \"RF\", \"LGB\"], metric=[\"f1\", \"recall\"], n_bootstrap=5)\natom.plot_results()\natom.plot_results(metric=\"time_fit+time\")\n</code></pre>"}, {"location": "API/plots/plot_rfecv/", "title": "plot_rfecv", "text": "<p>method plot_rfecv(title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot the rfecv results.</p> <p>Plot the scores obtained by the estimator fitted on every subset of the dataset. Only available when feature selection was applied with strategy=\"rfecv\".</p> <p>Parameterstitle: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_components Plot the explained variance ratio per component.</p> <p>plot_pca Plot the explained variance ratio vs number of components.</p> <p></p>"}, {"location": "API/plots/plot_rfecv/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.feature_selection(\"rfecv\", solver=\"Tree\")\natom.plot_rfecv()\n</code></pre>"}, {"location": "API/plots/plot_roc/", "title": "plot_roc", "text": "<p>method plot_roc(models=None, dataset=\"test\", target=0, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the Receiver Operating Characteristics curve.</p> <p>Read more about ROC in sklearn's documentation. Only available for classification tasks.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>dataset: str or sequence, default=\"test\" Data set on which to calculate the metric. Use a sequence or add <code>+</code> between options to select more than one. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_gains Plot the cumulative gains curve.</p> <p>plot_lift Plot the lift curve.</p> <p>plot_prc Plot the precision-recall curve.</p> <p></p>"}, {"location": "API/plots/plot_roc/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_roc()\n</code></pre>"}, {"location": "API/plots/plot_shap_bar/", "title": "plot_shap_bar", "text": "<p>method plot_shap_bar(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's bar plot.</p> <p>Create a bar plot of a set of SHAP values. If a single sample is passed, then the SHAP values are plotted. If many samples are passed, then the mean absolute value for each feature column is plotted. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_bar()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_bar/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_bar(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_beeswarm/", "title": "plot_shap_beeswarm", "text": "<p>method plot_shap_beeswarm(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's beeswarm plot.</p> <p>The plot is colored by feature values. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_beeswarm()</code>. <p>index: tuple, slice or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The beeswarm plot does not support plotting a single sample. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_parshap Plot the partial correlation of shap values.</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_beeswarm/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_beeswarm(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_decision/", "title": "plot_shap_decision", "text": "<p>method plot_shap_decision(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's decision plot.</p> <p>Visualize model decisions using cumulative SHAP values. Each plotted line explains a single model prediction. If a single prediction is plotted, feature values are printed in the plot (if supplied). If multiple predictions are plotted together, feature values will not be printed. Plotting too many predictions together will make the plot unintelligible. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_decision()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_decision/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_decision(show=10)\natom.plot_shap_decision(index=-1, show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_force/", "title": "plot_shap_force", "text": "<p>method plot_shap_force(models=None, index=None, target=1, title=None, legend=None, figsize=(900, 300), filename=None, display=True, **kwargs)[source]Plot SHAP's force plot.</p> <p>Visualize the given SHAP values with an additive force layout. Note that by default this plot will render using javascript. For a regular figure use <code>matplotlib=True</code> (this option is only available when only a single sample is plotted). Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_force()</code>. <p>index: int, str, slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=(900, 300) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>**kwargs Additional keyword arguments for shap.plots.force. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_scatter Plot SHAP's scatter plot.</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_force/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_force(index=-2, matplotlib=True, figsize=(1800, 300))\n</code></pre>"}, {"location": "API/plots/plot_shap_heatmap/", "title": "plot_shap_heatmap", "text": "<p>method plot_shap_heatmap(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's heatmap plot.</p> <p>This plot is designed to show the population substructure of a dataset using supervised clustering and a heatmap. Supervised clustering involves clustering data points not by their original feature values but by their explanations. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_heatmap()</code>. <p>index: slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_heatmap method does not support plotting a single sample. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p>plot_shap_waterfall Plot SHAP's waterfall plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_heatmap/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_heatmap(show=10)\n</code></pre>"}, {"location": "API/plots/plot_shap_scatter/", "title": "plot_shap_scatter", "text": "<p>method plot_shap_scatter(models=None, index=None, columns=0, target=1, title=None, legend=None, figsize=(900, 600), filename=None, display=True)[source]Plot SHAP's scatter plot.</p> <p>Plots the value of the feature on the x-axis and the SHAP value of the same feature on the y-axis. This shows how the model depends on the given feature, and is like a richer extension of the classical partial dependence plots. Vertical dispersion of the data points represents interaction effects. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_scatter()</code>. <p>index: slice, sequence or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_scatter method does not support plotting a single sample. <p>columns: int or str, default=0 Column to plot. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_decision Plot SHAP's decision plot.</p> <p>plot_shap_force Plot SHAP's force plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_scatter/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_scatter(columns=\"symmetry error\")\n</code></pre>"}, {"location": "API/plots/plot_shap_waterfall/", "title": "plot_shap_waterfall", "text": "<p>method plot_shap_waterfall(models=None, index=None, show=None, target=1, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot SHAP's waterfall plot.</p> <p>The SHAP value of a feature represents the impact of the evidence provided by that feature on the model\u2019s output. The waterfall plot is designed to visually display how the SHAP values (evidence) of each feature move the model output from our prior expectation under the background data distribution, to the final model prediction given the evidence of all the features. Features are sorted by the magnitude of their SHAP values with the smallest magnitude features grouped together at the bottom of the plot when the number of features in the models exceeds the <code>show</code> parameter. Read more about SHAP plots in the user guide.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_waterfall()</code>. <p>index: int, str or None, default=None Rows in the dataset to plot. If None, it selects all rows in the test set. The plot_shap_waterfall method does not support plotting multiple samples. <p>show: int or None, default=None Number of features (ordered by importance) to show. If None, it shows all features. <p>target: int, str or tuple, default=1 Class in the target column to target. For multioutput tasks, the value should be a tuple of the form (column, class). Note that for binary and multilabel tasks, the selected class is always the positive one. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of features shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as png. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsplt.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_shap_bar Plot SHAP's bar plot.</p> <p>plot_shap_beeswarm Plot SHAP's beeswarm plot.</p> <p>plot_shap_heatmap Plot SHAP's heatmap plot.</p> <p></p>"}, {"location": "API/plots/plot_shap_waterfall/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"LR\")\natom.plot_shap_waterfall(show=10)\n</code></pre>"}, {"location": "API/plots/plot_slice/", "title": "plot_slice", "text": "<p>method plot_slice(models=None, params=None, metric=None, title=None, legend=None, figsize=None, filename=None, display=True)[source]Plot the parameter relationship in a study.</p> <p>The color of the markers indicate the trial. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model or None, default=None Model to plot. If None, all models are selected. Note that leaving the default option could raise an exception if there are multiple models. To avoid this, call the plot directly from a model, e.g. <code>atom.lr.plot_slice()</code>. <p>params: str, slice, sequence or None, default=None Hyperparameters to plot. Use a sequence or add <code>+</code> between options to select more than one. If None, all the model's hyperparameters are selected. <p>metric: int or str, default=None Metric to plot (only for multi-metric runs). If str, add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of hyperparameters shown. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_parallel_coordinate Plot high-dimensional parameter relationships in a study.</p> <p></p>"}, {"location": "API/plots/plot_slice/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"RF\",\n    metric=[\"f1\", \"recall\"],\n    n_trials=15,\n)\natom.plot_slice(params=(0, 1, 2))\n</code></pre>"}, {"location": "API/plots/plot_successive_halving/", "title": "plot_successive_halving", "text": "<p>method plot_successive_halving(models=None, metric=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot scores per iteration of the successive halving.</p> <p>Only use with models fitted using successive halving. Ensembles are ignored.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_learning_curve Plot the learning curve: score vs number of training samples.</p> <p>plot_results Plot the model results.</p> <p></p>"}, {"location": "API/plots/plot_successive_halving/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.successive_halving([\"Tree\", \"Bag\", \"RF\", \"LGB\"], n_bootstrap=5)\natom.plot_successive_halving()\n</code></pre>"}, {"location": "API/plots/plot_terminator_improvement/", "title": "plot_terminator_improvement", "text": "<p>method plot_terminator_improvement(models=None, title=None, legend=\"upper right\", figsize=(900, 600), filename=None, display=True)[source]Plot the potentials for future objective improvement.</p> <p>This function visualizes the objective improvement potentials. It helps to determine whether you should continue the optimization or not. The evaluated error is also plotted. Note that this function may take some time to compute the improvement potentials. This plot is only available for models that ran hyperparameter tuning.</p> <p>Warning</p> <ul> <li>The plot_terminator_improvement method is only available   for models that ran hyperparameter tuning using   cross-validation, e.g. using <code>ht_params={'cv': 5}</code>.</li> <li>This method can be slow. Results are cached to fasten   repeated calls.</li> </ul> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper right\", Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y) <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_pareto_front Plot the Pareto front of a study.</p> <p>plot_timeline Plot the timeline of a study.</p> <p>plot_trials Plot the hyperparameter tuning trials.</p> <p></p>"}, {"location": "API/plots/plot_terminator_improvement/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\"RF\", n_trials=10, ht_params={\"cv\": 5})\natom.plot_terminator_improvement()\n</code></pre>"}, {"location": "API/plots/plot_threshold/", "title": "plot_threshold", "text": "<p>method plot_threshold(models=None, metric=None, dataset=\"test\", target=0, steps=100, title=None, legend=\"lower left\", figsize=(900, 600), filename=None, display=True)[source]Plot metric performances against threshold values.</p> <p>This plot is available only for models with a <code>predict_proba</code> method in a binary or multilabel classification task.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models are selected. <p>metric: str, func, scorer, sequence or None, default=None Metric to plot. Choose from any of sklearn's scorers, a function with signature <code>metric(y_true, y_pred)</code>, a scorer object or a sequence of these. Use a sequence or add <code>+</code> between options to select more than one. If None, the metric used to run the pipeline is selected. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>target: int or str, default=0 Target column to look at. Only for multilabel tasks. <p>steps: int, default=100 Number of thresholds measured. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_calibration Plot the calibration curve for a binary classifier.</p> <p>plot_confusion_matrix Plot a model's confusion matrix.</p> <p>plot_probabilities Plot the probability distribution of the target classes.</p> <p></p>"}, {"location": "API/plots/plot_threshold/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"LR\", \"RF\"])\natom.plot_threshold()\n</code></pre>"}, {"location": "API/plots/plot_timeline/", "title": "plot_timeline", "text": "<p>method plot_timeline(models=None, title=None, legend=\"lower right\", figsize=(900, 600), filename=None, display=True)[source]Plot the timeline of a study.</p> <p>This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"lower right\", Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y) <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_edf Plot the Empirical Distribution Function of a study.</p> <p>plot_slice Plot the parameter relationship in a study.</p> <p>plot_terminator_improvement Plot the potentials for future objective improvement.</p> <p></p>"}, {"location": "API/plots/plot_timeline/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom optuna.pruners import PatientPruner\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run(\n    models=\"LGB\",\n    n_trials=15,\n    ht_params={\"pruner\": PatientPruner(None, patience=2)},\n)\natom.plot_timeline()\n</code></pre>"}, {"location": "API/plots/plot_trials/", "title": "plot_trials", "text": "<p>method plot_trials(models=None, metric=None, title=None, legend=\"upper left\", figsize=(900, 800), filename=None, display=True)[source]Plot the hyperparameter tuning trials.</p> <p>Creates a figure with two plots: the first plot shows the score of every trial and the second shows the distance between the last consecutive steps. The best trial is indicated with a star. This is the same plot as produced by <code>ht_params={\"plot\": True}</code>. This plot is only available for models that ran hyperparameter tuning.</p> <p>Parametersmodels: int, str, Model, slice, sequence or None, default=None Models to plot. If None, all models that used hyperparameter tuning are selected. <p>metric: int, str, sequence or None, default=None Metric to plot (only for multi-metric runs). Add <code>+</code> between options to select more than one. If None, all metrics are selected. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=\"upper left\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple, default=(900, 800) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_evals Plot evaluation curves.</p> <p>plot_hyperparameters Plot hyperparameter relationships in a study.</p> <p>plot_results Plot the model results.</p> <p></p>"}, {"location": "API/plots/plot_trials/#example", "title": "Example", "text": "<pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.run([\"ET\", \"RF\"], n_trials=15)\natom.plot_trials()\n</code></pre>"}, {"location": "API/plots/plot_wordcloud/", "title": "plot_wordcloud", "text": "<p>method plot_wordcloud(index=None, title=None, legend=None, figsize=(900, 600), filename=None, display=True, **kwargs)[source]Plot a wordcloud from the corpus.</p> <p>The text for the plot is extracted from the column named <code>corpus</code>. If there is no column with that name, an exception is raised.</p> <p>Parametersindex: int, str, slice, sequence or None, default=None Documents in the corpus to include in the wordcloud. If None, it selects all documents in the dataset. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: str, dict or None, default=None Does nothing. Implemented for continuity of the API. <p>figsize: tuple, default=(900, 600) Figure's size in pixels, format as (x, y). <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool or None, default=True Whether to render the plot. If None, it returns the figure. <p>**kwargs Additional keyword arguments for the Wordcloud object. <p>Returnsgo.Figure or None Plot object. Only returned if <code>display=None</code>. <p></p> <p></p> <p>See Also</p> <p>plot_ngrams Plot n-gram frequencies.</p> <p>plot_pipeline Plot a diagram of the pipeline.</p> <p></p>"}, {"location": "API/plots/plot_wordcloud/#example", "title": "Example", "text": "<pre><code>import numpy as np\nfrom atom import ATOMClassifier\nfrom sklearn.datasets import fetch_20newsgroups\n\nX, y = fetch_20newsgroups(\n    return_X_y=True,\n    categories=[\"alt.atheism\", \"sci.med\", \"comp.windows.x\"],\n    shuffle=True,\n    random_state=1,\n)\nX = np.array(X).reshape(-1, 1)\n\natom = ATOMClassifier(X, y, random_state=1)\natom.textclean()\natom.textnormalize()\natom.plot_wordcloud()\n</code></pre>"}, {"location": "API/training/directclassifier/", "title": "DirectClassifier", "text": "<p>class atom.training.DirectClassifier(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>SuccessiveHalvingClassifier Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingClassifier Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = DirectClassifier(models=[\"LR\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/directforecaster/", "title": "DirectForecaster", "text": "<p>class atom.training.DirectForecaster(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>SuccessiveHalvingForecaster Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingForecaster Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = DirectForecaster(models=[\"ES\", \"ETS\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/directregressor/", "title": "DirectRegressor", "text": "<p>class atom.training.DirectRegressor(models=None, metric=None, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a direct fashion.</p> <p>The following steps are applied to every model:</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p>TrainSizingRegressor Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/directregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import DirectRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = DirectRegressor(models=[\"OLS\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/directregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/directregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/directregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/directregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/directregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/directregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/", "title": "SuccessiveHalvingClassifier", "text": "<p>class atom.training.SuccessiveHalvingClassifier(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMClassifier Main class for classification tasks.</p> <p>DirectClassifier Train and evaluate the models in a direct fashion.</p> <p>TrainSizingClassifier Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = SuccessiveHalvingClassifier([\"LR\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/", "title": "SuccessiveHalvingForecaster", "text": "<p>class atom.training.SuccessiveHalvingForecaster(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>DirectForecaster Train and evaluate the models in a direct fashion.</p> <p>TrainSizingForecaster Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = SuccessiveHalvingForecaster([\"ETS\", \"ES\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/successivehalvingregressor/", "title": "SuccessiveHalvingRegressor", "text": "<p>class atom.training.SuccessiveHalvingRegressor(models=None, metric=None, skip_runs=0, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a successive halving fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>skip_runs: int, default=0 Skip last <code>skip_runs</code> runs of the successive halving. <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>TrainSizingRegressor Train and evaluate the models in a train sizing fashion.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import SuccessiveHalvingRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = SuccessiveHalvingRegressor([\"OLS\", \"RF\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/successivehalvingregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/successivehalvingregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/successivehalvingregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/successivehalvingregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingclassifier/", "title": "TrainSizingClassifier", "text": "<p>class atom.training.TrainSizingClassifier(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, a default metric is selected for every task: <ul> <li>\"f1\" for binary classification</li> <li>\"f1_weighted\" for multiclass(-multioutput) classification</li> <li>\"average_precision\" for multilabel classification</li> </ul> <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingClassifier\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = TrainSizingClassifier(models=\"LR\", verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingclassifier/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingclassifier/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingclassifier/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingclassifier/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingforecaster/", "title": "TrainSizingForecaster", "text": "<p>class atom.training.TrainSizingForecaster(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>mean_absolute_percentage_error</code> is selected. <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMForecaster Main class for forecasting tasks.</p> <p>DirectForecaster Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingForecaster Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingForecaster\nfrom sktime.datasets import load_airline\nfrom sktime.forecasting.model_selection import temporal_train_test_split\n\ny = load_airline()\n\ntrain, test = temporal_train_test_split(y, test_size=0.2)\n\nrunner = TrainSizingForecaster([\"ETS\", \"ES\"], verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingforecaster/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingforecaster/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingforecaster/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingforecaster/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "API/training/trainsizingregressor/", "title": "TrainSizingRegressor", "text": "<p>class atom.training.TrainSizingRegressor(models=None, metric=None, train_sizes=5, est_params=None, n_trials=0, ht_params=None, n_bootstrap=0, parallel=False, errors=\"skip\", n_jobs=1, device=\"cpu\", engine={'data': 'numpy', 'estimator': 'sklearn'}, backend=\"loky\", verbose=0, warnings=False, logger=None, experiment=None, random_state=None)[source]Train and evaluate the models in a train sizing fashion.</p> <p>The following steps are applied to every model (per iteration):</p> <ol> <li>Apply hyperparameter tuning (optional).</li> <li>Fit the model on the training set using the best combination    of hyperparameters found.</li> <li>Evaluate the model on the test set.</li> <li>Train the estimator on various bootstrapped    samples of the training set and evaluate again on the test set    (optional).</li> </ol> <p>Parametersmodels: str, estimator or sequence, default=None Models to fit to the data. Allowed inputs are: an acronym from any of the predefined models, an ATOMModel or a custom predictor as class or instance. If None, all the predefined models are used. <p>metric: str, func, scorer, sequence or None, default=None Metric on which to fit the models. Choose from any of sklearn's scorers, a function with signature <code>function(y_true, y_pred) -&gt; score</code>, a scorer object or a sequence of these. If None, the default metric <code>r2</code> is selected. <p>train_sizes: int or sequence, default=5 Sequence of training set sizes used to run the trainings. <ul> <li>If int: Number of equally distributed splits, i.e. for a value   <code>N</code>, it's equal to <code>np.linspace(1.0/N, 1.0, N)</code>.</li> <li>If sequence: Fraction of the training set when &lt;=1, else total   number of samples.</li> </ul> <p>n_trials: int or sequence, default=0 Maximum number of iterations for the hyperparameter tuning. If 0, skip the tuning and fit the model on its default parameters. If sequence, the n-th value applies to the n-th model. <p>est_params: dict or None, default=None Additional parameters for the models. See their corresponding documentation for the available options. For multiple models, use the acronyms as key (or 'all' for all models) and a dict of the parameters as value. Add <code>_fit</code> to the parameter's name to pass it to the estimator's fit method instead of the constructor. <p>ht_params: dict or None, default=None Additional parameters for the hyperparameter tuning. If None, it uses the same parameters as the first run. Can include: <ul> <li>cv: int, cv-generator, dict or sequence, default=1   Cross-validation object or number of splits. If 1, the   data is randomly split in a subtrain and validation set.</li> <li>plot: bool, dict or sequence, default=False   Whether to plot the optimization's progress as it runs.   Creates a canvas with two plots: the first plot shows the   score of every trial and the second shows the distance between   the last consecutive steps. See the plot_trials method.</li> <li>distributions: dict, sequence or None, default=None   Custom hyperparameter distributions. If None, it uses the   model's predefined distributions. Read more in the   user guide.</li> <li>tags: dict, sequence or None, default=None   Custom tags for the model's trial and mlflow run.</li> <li>**kwargs   Additional Keyword arguments for the constructor of the   study class or the optimize method.</li> </ul> <p>n_bootstrap: int or sequence, default=0 Number of data sets to use for bootstrapping. If 0, no bootstrapping is performed. If sequence, the n-th value applies to the n-th model. <p>parallel: bool, default=False Whether to train the models in a parallel or sequential fashion. Using <code>parallel=True</code> turns off the verbosity of the models during training. Note that many models also have build-in parallelizations (often when the estimator has the <code>n_jobs</code> parameter). <p>errors: str, default=\"skip\" How to handle exceptions encountered during model training. Choose from: <ul> <li>\"raise\": Raise any encountered exception.</li> <li>\"skip\": Skip a failed model. This model is not accessible   after training.</li> <li>\"keep\": Keep the model in its state at failure. Note that   this model can break down many other methods after training.   This option is useful to be able to rerun hyperparameter   optimization after failure without losing previous successful   trials.</li> </ul> <p>n_jobs: int, default=1 Number of cores to use for parallel processing. <ul> <li>If &gt;0: Number of cores to use.</li> <li>If -1: Use all available cores.</li> <li>If &lt;-1: Use number of cores - 1 + <code>n_jobs</code>.</li> </ul> <p>device: str, default=\"cpu\" Device on which to run the estimators. Use any string that follows the SYCL_DEVICE_FILTER filter selector, e.g. <code>device=\"gpu\"</code> to use the GPU. Read more in the user guide. <p>engine: dict, default={\"data\": \"numpy\", \"estimator\": \"sklearn\"} Execution engine to use for data and estimators. The value should be a dictionary with keys <code>data</code> and/or <code>estimator</code>, with their corresponding choice as values. Choose from: <ul> <li> <p>\"data\":</p> <ul> <li>\"numpy\"</li> <li>\"pyarrow\"</li> <li>\"modin\"</li> </ul> </li> <li> <p>\"estimator\":</p> <ul> <li>\"sklearn\"</li> <li>\"sklearnex\"</li> <li>\"cuml\"</li> </ul> </li> </ul> <p>backend: str, default=\"loky\" Parallelization backend. Read more in the user guide. Choose from: <ul> <li>\"loky\": Single-node, process-based parallelism.</li> <li>\"multiprocessing\": Legacy single-node, process-based   parallelism. Less robust than <code>loky</code>.</li> <li>\"threading\": Single-node, thread-based parallelism.</li> <li>\"ray\": Multi-node, process-based parallelism.</li> </ul> <p>verbose: int, default=0 Verbosity level of the class. Choose from: <ul> <li>0 to not print anything.</li> <li>1 to print basic information.</li> <li>2 to print detailed information.</li> </ul> <p>warnings: bool or str, default=False <ul> <li>If True: Default warning action (equal to \"default\").</li> <li>If False: Suppress all warnings (equal to \"ignore\").</li> <li>If str: One of python's warnings filters.</li> </ul> <p>Changing this parameter affects the <code>PYTHONWARNINGS</code> environment. ATOM can't manage warnings that go from C/C++ code to stdout.</p> <p>logger: str, Logger or None, default=None <ul> <li>If None: Logging isn't used.</li> <li>If str: Name of the log file. Use \"auto\" for automatic name.</li> <li>Else: Python <code>logging.Logger</code> instance.</li> </ul> <p>experiment: str or None, default=None Name of the mlflow experiment to use for tracking. If None, no mlflow tracking is performed. <p>random_state: int or None, default=None Seed used by the random number generator. If None, the random number generator is the <code>RandomState</code> used by <code>np.random</code>. <p></p> <p></p> <p>See Also</p> <p>ATOMRegressor Main class for regression tasks.</p> <p>DirectRegressor Train and evaluate the models in a direct fashion.</p> <p>SuccessiveHalvingRegressor Train and evaluate the models in a successive halving fashion.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#example", "title": "Example", "text": "<pre><code>from atom.training import TrainSizingRegressor\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\nX, y = load_digits(return_X_y=True, as_frame=True)\n\ntrain, test = train_test_split(\n    X.merge(y.to_frame(), left_index=True, right_index=True),\n    test_size=0.3,\n)\n\nrunner = TrainSizingRegressor(models=\"OLS\", verbose=2)\nrunner.run(train, test)\n\n# Analyze the results\nprint(runner.results)\n\nprint(runner.evaluate())\n</code></pre>"}, {"location": "API/training/trainsizingregressor/#attributes", "title": "Attributes", "text": ""}, {"location": "API/training/trainsizingregressor/#data-attributes", "title": "Data attributes", "text": "<p>The data attributes are used to access the dataset and its properties. Updating the dataset will automatically update the response of these attributes accordingly.</p> <p>Attributesdataset: dataframeComplete data set.train: dataframeTraining set.test: dataframeTest set.X: dataframeFeature set.y: series | dataframeTarget column(s).X_train: dataframeFeatures of the training set.y_train: series | dataframeTarget column(s) of the training set.X_test: dataframeFeatures of the test set.y_test: series | dataframeTarget column(s) of the test set.shape: tuple[int, int]Shape of the dataset (n_rows, n_columns).columns: indexName of all the columns.n_columns: intNumber of columns.features: indexName of the features.n_features: intNumber of features.target: str | list[str]Name of the target column(s).</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#utility-attributes", "title": "Utility attributes", "text": "<p>The utility attributes are used to access information about the models in the instance after training.</p> <p>Attributesmodels: str | list[str] | NoneName of the model(s).metric: str | list[str] | NoneName of the metric(s).winners: list[model] | NoneModels ordered by performance. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.winner: model | NoneBest performing model. <p>Performance is measured as the highest score on the model's <code>score_bootstrap</code> or <code>score_test</code> attributes, checked in that order. For multi-metric runs, only the main metric is compared. Ties are resolved looking at the lowest time_fit.results: pd.DataFrameOverview of the training results. <p>All durations are in seconds. Columns include:</p> <ul> <li>score_ht: Score obtained by the hyperparameter tuning.</li> <li>time_ht: Duration of the hyperparameter tuning.</li> <li>score_train: Metric score on the train set.</li> <li>score_test: Metric score on the test set.</li> <li>time_fit: Duration of the model fitting on the train set.</li> <li>score_bootstrap: Mean score on the bootstrapped samples.</li> <li>time_bootstrap: Duration of the bootstrapping.</li> <li>time: Total duration of the model run.</li> </ul> <p></p>"}, {"location": "API/training/trainsizingregressor/#tracking-attributes", "title": "Tracking attributes", "text": "<p>The tracking attributes are used to customize what elements of the experiment are tracked. Read more in the user guide.</p> <p>Attributeslog_ht: boolWhether to track every trial of the hyperparameter tuning.log_model: boolWhether to save the model's estimator after fitting.log_plots: boolWhether to save plots as artifacts.log_data: boolWhether to save the train and test sets.log_pipeline: boolWhether to save the model's pipeline.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#plot-attributes", "title": "Plot attributes", "text": "<p>The plot attributes are used to customize the plot's aesthetics. Read more in the user guide.</p> <p>Attributespalette: str | sequenceColor palette. <p>Specify one of plotly's built-in palettes or create a custom one, e.g. <code>atom.palette = [\"red\", \"green\", \"blue\"]</code>.title_fontsize: intFontsize for the plot's title.label_fontsize: intFontsize for the labels, legend and hover information.tick_fontsize: intFontsize for the ticks along the plot's axes.line_width: intWidth of the line plots.marker_size: intSize of the markers.</p> <p></p>"}, {"location": "API/training/trainsizingregressor/#methods", "title": "Methods", "text": "<p>Next to the plotting methods, the class contains a variety of methods to handle the data, run the training, and manage the pipeline.</p> <p>available_modelsGive an overview of the available predefined models.canvasCreate a figure with multiple plots.clearReset attributes and clear cache from all models.deleteDelete models.evaluateGet all models' scores for the provided metrics.export_pipelineExport the pipeline to a sklearn-like object.get_class_weightReturn class weights for a balanced data set.get_paramsGet parameters for this estimator.logPrint message and save to log file.mergeMerge another instance of the same class into this one.update_layoutUpdate the properties of the plot's layout.update_tracesUpdate the properties of the plot's traces.reset_aestheticsReset the plot aesthetics to their default values.runTrain and evaluate the models.saveSave the instance to a pickle file.set_paramsSet the parameters of this estimator.stackingAdd a Stacking model to the pipeline.votingAdd a Voting model to the pipeline.</p> <p></p> <p>method available_models()[source]Give an overview of the available predefined models.</p> <p>Returnspd.DataFrame Information about the available predefined models. Columns include: <ul> <li>acronym: Model's acronym (used to call the model).</li> <li>model: Name of the model's class.</li> <li>estimator: The model's underlying estimator.</li> <li>module: The estimator's module.</li> <li>needs_scaling: Whether the model requires feature scaling.</li> <li>accepts_sparse: Whether the model accepts sparse matrices.</li> <li>native_multilabel: Whether the model has native support   for multilabel tasks.</li> <li>native_multioutput: Whether the model has native support   for multioutput tasks.</li> <li>has_validation: Whether the model has in-training validation.</li> <li>supports_engines: Engines supported by the model. </li> </ul> <p></p> <p>method canvas(rows=1, cols=2, horizontal_spacing=0.05, vertical_spacing=0.07, title=None, legend=\"out\", figsize=None, filename=None, display=True)[source]Create a figure with multiple plots.</p> <p>This <code>@contextmanager</code> allows you to draw many plots in one figure. The default option is to add two plots side by side. See the user guide for an example.</p> <p>Parametersrows: int, default=1 Number of plots in length. <p>cols: int, default=2 Number of plots in width. <p>horizontal_spacing: float, default=0.05 Space between subplot rows in normalized plot coordinates. The spacing is relative to the figure's size. <p>vertical_spacing: float, default=0.07 Space between subplot cols in normalized plot coordinates. The spacing is relative to the figure's size. <p>title: str, dict or None, default=None Title for the plot. <ul> <li>If None, no title is shown.</li> <li>If str, text for the title.</li> <li>If dict, title configuration.</li> </ul> <p>legend: bool, str or dict, default=\"out\" Legend for the plot. See the user guide for an extended description of the choices. <ul> <li>If None: No legend is shown.</li> <li>If str: Location where to show the legend.</li> <li>If dict: Legend configuration.</li> </ul> <p>figsize: tuple or None, default=None Figure's size in pixels, format as (x, y). If None, it adapts the size to the number of plots in the canvas. <p>filename: str or None, default=None Save the plot using this name. Use \"auto\" for automatic naming. The type of the file depends on the provided name (.html, .png, .pdf, etc...). If <code>filename</code> has no file type, the plot is saved as html. If None, the plot is not saved. <p>display: bool, default=True Whether to render the plot. <p>Yieldsgo.Figure Plot object. </p> <p></p> <p>method clear()[source]Reset attributes and clear cache from all models.</p> <p>Reset certain model attributes to their initial state, deleting potentially large data arrays. Use this method to free some memory before saving the instance. The affected attributes are:</p> <ul> <li>In-training validation scores</li> <li>Shap values</li> <li>App instance</li> <li>Dashboard instance</li> <li>Cached prediction attributes</li> <li>Cached metric scores</li> <li>Cached holdout data sets</li> </ul> <p></p> <p>method delete(models=None)[source]Delete models.</p> <p>If all models are removed, the metric is reset. Use this method to drop unwanted models from the pipeline or to free some memory before saving. Deleted models are not removed from any active mlflow experiment.</p> <p>Parametersmodels: int, str, slice, Model, sequence or None, default=None Models to delete. If None, all models are deleted. </p> <p></p> <p>method evaluate(metric=None, dataset=\"test\", threshold=0.5, sample_weight=None)[source]Get all models' scores for the provided metrics.</p> <p>Parametersmetric: str, func, scorer, sequence or None, default=None Metric to calculate. If None, it returns an overview of the most common metrics per task. <p>dataset: str, default=\"test\" Data set on which to calculate the metric. Choose from: \"train\", \"test\" or \"holdout\". <p>threshold: float or sequence, default=0.5 Threshold between 0 and 1 to convert predicted probabilities to class labels. Only used when: <ul> <li>The task is binary or multilabel classification.</li> <li>The model has a <code>predict_proba</code> method.</li> <li>The metric evaluates predicted probabilities.</li> </ul> <p>For multilabel classification tasks, it's possible to provide a sequence of thresholds (one per target column). The same threshold per target column is applied to all models.</p> <p>sample_weight: sequence or None, default=None Sample weights corresponding to y in <code>dataset</code>. <p>Returnspd.DataFrame Scores of the models. </p> <p></p> <p>method export_pipeline(model=None, memory=None, verbose=None)[source]Export the pipeline to a sklearn-like object.</p> <p>Optionally, you can add a model as final estimator. The returned pipeline is already fitted on the training set.</p> <p>Info</p> <p>The returned pipeline behaves similarly to sklearn's Pipeline, and additionally:</p> <ul> <li>Accepts transformers that change the target column.</li> <li>Accepts transformers that drop rows.</li> <li>Accepts transformers that only are fitted on a subset of   the provided dataset.</li> <li>Always returns pandas objects.</li> <li>Uses transformers that are only applied on the training   set to fit the pipeline, not to make predictions.</li> </ul> <p>Parametersmodel: str, Model or None, default=None Model for which to export the pipeline. If the model used automated feature scaling, the Scaler is added to the pipeline. If None, the pipeline in the current branch is exported. <p>memory: bool, str, Memory or None, default=None Used to cache the fitted transformers of the pipeline.     - If None or False: No caching is performed.     - If True: A default temp directory is used.     - If str: Path to the caching directory.     - If Memory: Object with the joblib.Memory interface. <p>verbose: int or None, default=None Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. Note that this is not the pipeline's own verbose parameter. To change that, use the <code>set_params</code> method. <p>ReturnsPipeline Current branch as a sklearn-like Pipeline object. </p> <p></p> <p>method get_class_weight(dataset=\"train\")[source]Return class weights for a balanced data set.</p> <p>Statistically, the class weights re-balance the data set so that the sampled data set represents the target population as closely as possible. The returned weights are inversely proportional to the class frequencies in the selected data set.</p> <p>Parametersdataset: str, default=\"train\" Data set from which to get the weights. Choose from: \"train\", \"test\", \"dataset\". <p>Returnsdict Classes with the corresponding weights. A dict of dicts is returned for multioutput tasks. </p> <p></p> <p>method get_params(deep=True)[source]Get parameters for this estimator.</p> <p>Parametersdeep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. <p>Returnsparams : dict Parameter names mapped to their values. </p> <p></p> <p>method log(msg, level=0, severity=\"info\")[source]Print message and save to log file.</p> <p>Parametersmsg: int, float or str Message to save to the logger and print to stdout. <p>level: int, default=0 Minimum verbosity level to print the message. <p>severity: str, default=\"info\" Severity level of the message. Choose from: debug, info, warning, error, critical. </p> <p></p> <p>method merge(other, suffix=\"2\")[source]Merge another instance of the same class into this one.</p> <p>Branches, models, metrics and attributes of the other instance are merged into this one. If there are branches and/or models with the same name, they are merged adding the <code>suffix</code> parameter to their name. The errors and missing attributes are extended with those of the other instance. It's only possible to merge two instances if they are initialized with the same dataset and trained with the same metric.</p> <p>Parametersother: Runner Instance with which to merge. Should be of the same class as self. <p>suffix: str, default=\"2\" Conflicting branches and models are merged adding <code>suffix</code> to the end of their names. </p> <p></p> <p>method update_layout(**kwargs)[source]Update the properties of the plot's layout.</p> <p>Recursively update the structure of the original layout with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_layout method. </p> <p></p> <p>method update_traces(**kwargs)[source]Update the properties of the plot's traces.</p> <p>Recursively update the structure of the original traces with the values in the arguments.</p> <p>Parameters**kwargs Keyword arguments for the figure's update_traces method. </p> <p></p> <p>method reset_aesthetics()[source]Reset the plot aesthetics to their default values.</p> <p></p> <p>method run(*arrays)[source]Train and evaluate the models.</p> <p>Read more in the user guide.</p> <p>Parameters*arrays: sequence of indexables Training set and test set. Allowed formats are: <ul> <li>train, test</li> <li>X_train, X_test, y_train, y_test</li> <li>(X_train, y_train), (X_test, y_test) </li> </ul> <p></p> <p>method save(filename=\"auto\", save_data=True)[source]Save the instance to a pickle file.</p> <p>Parametersfilename: str, default=\"auto\" Name of the file. Use \"auto\" for automatic naming. <p>save_data: bool, default=True Whether to save the dataset with the instance. This parameter is ignored if the method is not called from atom. If False, add the data to the load method. </p> <p></p> <p>method set_params(**params)[source]Set the parameters of this estimator.</p> <p>Parameters**params : dict Estimator parameters. <p>Returnsself : estimator instance Estimator instance. </p> <p></p> <p>method stacking(models=None, name=\"Stack\", **kwargs)[source]Add a Stacking model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Stack\" Name of the model. The name is always presided with the model's acronym: <code>Stack</code>. <p>**kwargs Additional keyword arguments for sklearn's stacking instance. The model's acronyms can be used for the <code>final_estimator</code> parameter. </p> <p></p> <p>method voting(models=None, name=\"Vote\", **kwargs)[source]Add a Voting model to the pipeline.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p> <p>Parametersmodels: slice, sequence or None, default=None Models that feed the stacking estimator. The models must have been fitted on the current branch. <p>name: str, default=\"Vote\" Name of the model. The name is always presided with the model's acronym: <code>Vote</code>. <p>**kwargs Additional keyword arguments for sklearn's voting instance. </p> <p></p>"}, {"location": "changelog/v4.x.x/", "title": "Release history", "text": ""}, {"location": "changelog/v4.x.x/#version-4141", "title": "Version 4.14.1", "text": "<ul> <li>Fixed an installation issue with <code>conda</code>.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4140", "title": "Version 4.14.0", "text": "<ul> <li>Refactor of the Cleaner and Vectorizer classes.</li> <li>Refactor of the cross_validate method.</li> <li>The plot_pipeline method now supports drawing multiple pipelines.</li> <li>Renamed the <code>Normalizer</code> class to <code>TextNormalizer</code>.</li> <li>Renamed the <code>Gauss</code> class to <code>Normalizer</code>.</li> <li>Added the <code>inverse_transform</code> method to the Scaler,   Normalizer and Cleaner    classes.</li> <li>Added the <code>winners</code> property to the trainers (note the extra <code>s</code>). </li> <li>Added the <code>feature_names_in_</code> and <code>n_features_in_</code> attributes to transformers.</li> <li>The default value of the <code>warnings</code> parameter is set to False.</li> <li>Improvements for multicollinearity removal in FeatureSelector.</li> <li>Renamed default feature names to <code>x0</code>, <code>x1</code>, etc... for consistency with   sklearn's API.</li> <li>Renamed component names in FeatureSelector   to <code>pca0</code>, <code>pca1</code>, etc... for consistency with sklearn's API.</li> <li>Significant speed up in pipeline transformations.</li> <li>Fixed a bug where mlflow runs could be ended unexpectedly.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4131", "title": "Version 4.13.1", "text": "<ul> <li>Fixed an installation issue.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4130", "title": "Version 4.13.0", "text": "<ul> <li>Added GPU support. Read more in the user guide.</li> <li>Added advanced feature selection strategies.</li> <li>Added the <code>return_sparse</code> parameter to the Vectorizer class.</li> <li>Added the <code>quantile</code> hyperparameter to the Dummy model.</li> <li>The data attributes now return   pandas objects where possible.</li> <li>Fixed a bug where the BO could crash after balancing   the data.</li> <li>Fixed a bug where saving the FeatureGenerator   class could fail for certain operators.</li> <li>Fixed a bug where the FeatureSelector   class displayed the wrong output.</li> <li>Fixed a bug where the <code>mapping</code> attribute was not reordered.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4120", "title": "Version 4.12.0", "text": "<ul> <li>Support for Python 3.10.</li> <li>New Discretizer class to bin numerical features.</li> <li>Refactor of the FeatureGenerator class.</li> <li>The <code>mapping</code> attribute now shows all encoded features.</li> <li>Added the <code>sample_weight</code> parameter to the evaluate method.</li> <li>ATOMClassifier has now a <code>stratify</code> parameter   to split the data sets in a stratified fashion.</li> <li>Possibility to exclude hyperparameters from the BO adding <code>!</code> before the name.</li> <li>Added memory usage to the stats method.</li> <li>Fixed a bug where decision_plot could fail when   only one row was plotted.</li> <li>Added versioning to the documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4110", "title": "Version 4.11.0", "text": "<ul> <li>Full support for sparse matrices. Read more in the user guide.</li> <li>The shrink method now also handles   sparse features.</li> <li>Refactor of the distribution method.</li> <li>Added three new linear models: Lars, Huber   and Perc.</li> <li>Dimensions can be shared across models using the key 'all' in <code>ht_params[\"dimensions\"]</code>.</li> <li>Assign hyperparameters to tune using the predefined dimensions.</li> <li>It's now possible to tune a custom number of layers for the MLP model.</li> <li>If multiple BO calls share the best score, the one with the shortest   training time is selected as winner (instead of the first).</li> <li>Fixed a bug where the BO could fail when custom dimensions where defined.</li> <li>Fixed a bug where FeatureSelector   could fail after repeated calls to fit.</li> <li>Fixed a bug where FeatureGenerator   didn't pass the correct data indices to its output.</li> <li>Performance improvements for the custom pipeline.</li> <li>Minor documentation fixes.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-4100", "title": "Version 4.10.0", "text": "<ul> <li>Added the <code>holdout</code> data set to have an extra way of assessing a   model's performance on a completely independent dataset. Read more   in the user_guide.</li> <li>Complete rework of the ensemble models.</li> <li>Support for dataframe indexing. Read more in the user guide.</li> <li>New plot_parshap plot to detect overfitting   features.</li> <li>The new dashboard method makes analyzing   the models even easier using a dashboard app.</li> <li>The plot_feature_importance   plot now also accepts estimators with coefficients.</li> <li>Added the transform method for models.</li> <li>Added the <code>threshold</code> parameter to the evaluate   method.</li> <li>The <code>reset_predictions</code> method is deprecated in favour of the new   clear method.</li> <li>Refactor of the model's full_train method.</li> <li>The merge method is available   for all trainers.</li> <li>Improvements in the trainer's pipeline.</li> <li>Training scores are now also saved to the mlflow run.</li> <li>Trying to change the data in a branch after fitting a model with it now   raises an exception.</li> <li>Fixed a bug where the columns of array inputs were not ordered correctly.</li> <li>Fixed a bug where branches did not correctly act case-insensitive.</li> <li>Fixed a bug where the export_pipeline   method for models would not export the transformers in the correct branch.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-491", "title": "Version 4.9.1", "text": "<ul> <li>Changed the default cross-validation for hyperparameter tuning   from 5 to 1 to avoid errors with deep learning models.</li> <li>Added clearer exception messages when a model's run failed.</li> <li>Fixed a bug where custom dimensions didn't show during   hyperparameter tuning.</li> <li>Documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-490", "title": "Version 4.9.0", "text": "<ul> <li>Drop support of Python 3.6.</li> <li>Added the HistGBM model.</li> <li>Improved print layout for hyperparameter tuning.</li> <li>The new available_models   method returns an overview of the available predefined models.</li> <li>The calibrate and cross_validate   methods can no longer be accessed from the trainers.</li> <li>The <code>pipeline</code> parameter for the prediction methods   is deprecated.</li> <li>Improved visualization of the plot_rfecv,   plot_successive_halving and   plot_learning_curve methods.</li> <li>Sparse matrices are now accepted as input.</li> <li>Duplicate BO calls are no longer calculated.</li> <li>Improvement in performance of the RNN model.</li> <li>Refactor of the model's <code>bo</code> attribute.</li> <li>Predefined hyperparameters have been updated to be consistent with sklearn's API.</li> <li>Fixed a bug where custom scalers were ignored by the models.</li> <li>Fixed a bug where the BO of certain models would crash with custom hyperparameters.</li> <li>Fixed a bug where duplicate column names could be generated from a custom transformer.</li> <li>Documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-480", "title": "Version 4.8.0", "text": "<ul> <li>The Encoder class now directly handles   unknown categories encountered during fitting.</li> <li>The Balancer and Encoder   classes now accept custom estimators for the <code>strategy</code> parameter.</li> <li>The new merge method enables the   user to merge multiple atom instances into one.</li> <li>The dtype shrinking is moved from atom's initializers to the   shrink method.</li> <li>ATOM's custom pipeline now   handles transformers fitted on a subset of the dataset.</li> <li>The <code>column</code> parameter in the distribution   method is renamed to <code>columns</code> for continuity of the API.</li> <li>The <code>mae</code> criterion for the GBM model hyperparameter   tuning is deprecated to be consistent with sklearn's API.</li> <li>Branches are now case-insensitive.</li> <li>Renaming a branch using an existing name now raises an exception.</li> <li>Fixed a bug where columns of type <code>category</code> broke the Imputer   class.</li> <li>Fixed a bug where predictions of the Stacking   ensemble crashed for branches with multiple transformers.</li> <li>The tables in the documentation now adapt to dark mode.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-473", "title": "Version 4.7.3", "text": "<ul> <li>Fixed a bug where the conda-forge recipe couldn't install properly.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-472", "title": "Version 4.7.2", "text": "<ul> <li>Fixed a bug where the pipeline failed for custom transformers that   returned sparse matrices.</li> <li>Package requirements files are added to the installer.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-471", "title": "Version 4.7.1", "text": "<ul> <li>Fixed a bug where the pip installer failed.</li> <li>Fixed a bug where categorical columns also selected datetime columns.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-470", "title": "Version 4.7.0", "text": "<ul> <li>Launched our new slack channel!</li> <li>The new FeatureExtractor class   extracts useful features from datetime columns.</li> <li>The new plot_det method plots a binary classifier's   detection error tradeoff curve. </li> <li>The partial dependence plot is   able to draw Individual Conditional Expectation (ICE) lines.</li> <li>The full traceback of exceptions encountered during training are now   saved to the logger.</li> <li>ATOMClassifier and ATOMRegressor   now convert the dtypes of the input data to the minimal allowed type   for memory efficiency.</li> <li>The scoring method is renamed to evaluate   to clarify its purpose.</li> <li>The <code>column</code> parameter in the apply method   is renamed to <code>columns</code> for continuity of the API.</li> <li>Minor documentation improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-460", "title": "Version 4.6.0", "text": "<ul> <li>Added the full_train method to retrieve   an estimator trained on the complete dataset.</li> <li>The score method is now also able to calculate   custom metrics on new data.</li> <li>Refactor of the Imputer class. </li> <li>Refactor of the Encoder class to avoid errors   for unknown classes and allow the input of missing values.</li> <li>The clean method no longer automatically   encodes the target column for regression tasks.</li> <li>Creating a branch using a models' acronym as name now raises an exception.</li> <li>Fixed a bug where CatBoost failed when <code>early_stopping</code> &lt; 1.</li> <li>Fixed a bug where created pipelines had duplicated names.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-450", "title": "Version 4.5.0", "text": "<ul> <li>Support of NLP pipelines. Read more in the user guide.</li> <li>Integration of mlflow to track all models in the   pipeline. Read more in the user guide.</li> <li>The new Normalizer class transforms features to   a more Gaussian-like distribution.</li> <li>New cross_validate method to   evaluate the robustness of a pipeline using cross_validation.</li> <li>New reset method to go back to atom's   initial state.</li> <li>Added the Dummy model to compare other models with a   simple baseline.</li> <li>New plot_wordcloud and plot_ngrams   methods for text visualization.</li> <li>Plots now can return the figure object when <code>display=None</code>.</li> <li>The Pruner class can now able to drop outliers   based on the selection of multiple strategies.</li> <li>The new <code>shuffle</code> parameter in atom's initializer determines whether to   shuffle the dataset.</li> <li>The trainers no longer require you to specify a model using the <code>models</code>   parameter. If left to default, all predefined models   for that task are used.</li> <li>The apply method now accepts args and   kwargs for the function.</li> <li>Refactor of the evaluate method.</li> <li>Refactor of the export_pipeline method.</li> <li>The parameters in the Cleaner class have   been refactored to better describe their function.</li> <li>The <code>train_sizes</code> parameter in train_sizing   now accepts integer values to automatically create equally distributed   splits in the training set.</li> <li>Refactor of plot_pipeline to show models in the   diagram as well.</li> <li>Refactor of the <code>bagging</code> parameter to the (more appropriate) name <code>n_bootstrap</code>.</li> <li>New option to exclude columns from a transformer adding <code>!</code> before their name.</li> <li>Fixed a bug where the Pruner class failed if   there were categorical columns in the dataset.</li> <li>Completely reworked documentation website.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-440", "title": "Version 4.4.0", "text": "<ul> <li>The drop method now allows the user   to drop columns as part of the pipeline.</li> <li>New apply method to perform data transformations   as function to the pipeline</li> <li>Added the status method to save an   overview of atom's branches and models to the logger.</li> <li>Improved the output messages for the Imputer class.</li> <li>The dataset's columns can now be called directly from atom.</li> <li>The distribution and plot_distribution   methods now ignore missing values.</li> <li>Fixed a bug where transformations could fail when columns were added to the   dataset after initializing the pipeline.</li> <li>Fixed a bug where the Cleaner class didn't drop   columns consisting entirely of missing values when <code>drop_min_cardinality=True</code>.</li> <li>Fixed a bug where the winning model wasn't displayed correctly.</li> <li>Refactored the way transformers are added or removed from predicting methods.</li> <li>Improved documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-430", "title": "Version 4.3.0", "text": "<ul> <li>Possibility to add custom transformers to the pipeline.</li> <li>The export_pipeline utility method exports atom's current pipeline to a sklearn object.</li> <li>Use AutoML to automate the search for an optimized pipeline.</li> <li>New magic methods makes atom behave similarly to sklearn's Pipeline.</li> <li>All training approaches can now be combined in the same atom instance.</li> <li>New plot_relationships, plot_distribution and plot_qq plots for data inspection.</li> <li>Complete rework of all the shap plots to be consistent with their new API.</li> <li>Improvements for the Scaler and Pruner classes.</li> <li>The acronym for custom models now defaults to the capital letters in the class' __name__.</li> <li>Possibility to apply transformations on only a subset of the columns.</li> <li>Plots and methods now accept <code>winner</code> as model name.</li> <li>Fixed a bug where custom metrics didn't show the correct name.</li> <li>Fixed a bug where timers were not displayed correctly.</li> <li>Further compatibility with deep learning datasets.</li> <li>Large refactoring for performance optimization.</li> <li>Cleaner output of messages to the logger.</li> <li>Plots no longer show a default title.</li> <li>Added the AutoML example notebook.</li> <li>Minor bug fixes.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-421", "title": "Version 4.2.1", "text": "<ul> <li>Bug fix where there was memory leakage in successive halving   and train sizing pipelines.</li> <li>The XGBoost,   LightGBM and   CatBoost packages can now be installed through the installer's   extras_require under the name <code>models</code>, e.g. <code>pip install -U atom-ml[models]</code>.</li> <li>Improved documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-420", "title": "Version 4.2.0", "text": "<ul> <li>Possibility to add custom models to the pipeline using ATOMModel.</li> <li>Compatibility with deep learning models.</li> <li>New branch system for different data pipelines. Read more in the user guide.</li> <li>Use the canvas contextmanager to draw multiple plots in one figure.</li> <li>New voting and stacking ensemble techniques.</li> <li>New get_class_weight utility method.</li> <li>New Sequential Feature Selection strategy for the FeatureSelector.</li> <li>Added the <code>sample_weight</code> parameter to the score method.</li> <li>New ways to initialize the data in the <code>training</code> instances.</li> <li>The <code>n_rows</code> parameter in ATOMLoader is deprecated in   favour of the new input formats.</li> <li>The <code>test_size</code> parameter now also allows integer values.</li> <li>Renamed categories to classes to be consistent with sklearn's API.</li> <li>The class property now returns a pd.DataFrame of the number of rows per target class   in the train, test and complete dataset.</li> <li>Possibility to add custom parameters to an estimator's fit method through <code>est_params</code>.</li> <li>The successive halving and train sizing   approaches now both allow subsequent runs from atom without losing the   information from previous runs.</li> <li>Bug fix where ATOMLoader wouldn't encode the target column during transformation.</li> <li>Added the Deep learning,    Ensembles and   Utilities example notebooks.</li> <li>Support for python 3.9.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-410", "title": "Version 4.1.0", "text": "<ul> <li>New <code>est_params</code> parameter to customize the parameters in every model's estimator.</li> <li>Following skopt's API, the <code>n_random_starts</code> parameter to specify the number   of random trials is deprecated in favour of <code>n_initial_points</code>.</li> <li>The Balancer class now allows you to use any of the   strategies from imblearn.</li> <li>New utility attributes to inspect the dataset.</li> <li>Four new models: CatNB, CNB,   ARD and RNN.</li> <li>Added the models section to the documentation.</li> <li>Small changes in log outputs.</li> <li>Bug fixes and performance improvements.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-401", "title": "Version 4.0.1", "text": "<ul> <li>Bug fix where the FeatureGenerator   was not deterministic for a fixed random state.</li> <li>Bug fix where subsequent runs with the same metric failed.</li> <li>Added the license file to the package's installer.</li> <li>Typo fixes in documentation.</li> </ul>"}, {"location": "changelog/v4.x.x/#version-400", "title": "Version 4.0.0", "text": "<ul> <li>Bayesian optimization package changed from GpyOpt   to skopt.</li> <li>Complete revision of the model's hyperparameters.</li> <li>Four SHAP plots can now be called directly from an ATOM pipeline.</li> <li>Two new plots for regression tasks.</li> <li>New plot_pipeline and <code>pipeline</code> attribute to access all transformers. </li> <li>Possibility to determine transformer parameters per method.</li> <li>New calibration method and plot.</li> <li>Metrics can now be added as scorers or functions with signature metric(y, y_pred, **kwargs).</li> <li>Implementation of multi-metric runs.</li> <li>Possibility to choose which metric to plot.</li> <li>Early stopping for models that allow in-training validation.</li> <li>Added the ATOMLoader function to load any saved pickle instance.</li> <li>The \"remove\" strategy in the data cleaning parameters is deprecated in favour of \"drop\".</li> <li>Implemented the dfs strategy in FeatureGenerator.</li> <li>All training classes now inherit from BaseEstimator.</li> <li>Added multiple new example notebooks.</li> <li>Tests coverage up to 100%.</li> <li>Completely new documentation page.</li> <li>Bug fixes and performance improvements.</li> </ul>"}, {"location": "changelog/v5.x.x/", "title": "Release history", "text": ""}, {"location": "changelog/v5.x.x/#version-600", "title": "Version 6.0.0", "text": "<p> New features</p> <ul> <li>Completely new module for time series. Read more in the user guide.</li> <li>Support for Python 3.11.</li> <li>New update_traces method to further customize your plots.</li> <li>New data engines. Read more in the user guide.</li> </ul> <p> API changes</p> <ul> <li>The FeatureGrouper class no longer accepts a <code>name</code> parameter. Provide   the group names directly through the <code>group</code> parameter as dict.</li> <li>Rework of the register method.</li> <li>The <code>multioutput</code> attribute is deprecated. Multioutput meta-estimators are   now assigned automatically.</li> <li>Model tags have to be separated from the acronym by an underscore.</li> <li>The <code>engine</code> parameter is now a dict.</li> </ul> <p> Enhancements</p> <ul> <li>Transformations only on <code>y</code> are now accepted, e.g. <code>atom.scale(columns=-1)</code>.</li> <li>The dataset can now be provided as a callable.</li> <li>Cleaner representation on hover for the plot_timeline method.</li> <li>Added the <code>hdbscan</code> strategy to the Pruner class.</li> <li>The <code>cv</code> key in <code>ht_params</code> now accepts a custom cross-validation generator.</li> <li>Improved error message for incorrect stratification of multioutput datasets.</li> <li>Rework of the shrink method.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where the cross_validate method could   fail for pipelines that changed the number of rows.</li> <li>Fixed a bug where the Pruner class didn't drop all outlier clusters.</li> <li>Fixed a bug where the pipeline could fail for transformers that returned a   series.</li> <li>Fixed a bug where the pipeline could fail for transformers that reset its   internal attributes during fitting.</li> <li>Fixed a bug where the register method failed in Databricks.</li> <li>Fixed a bug where tuning hyperparameter for a <code>base_estimator</code> inside a custom   meta-estimator would fail.</li> <li>Fixed a bug where the data properties' <code>@setter</code> could fail for numpy arrays.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-520", "title": "Version 5.2.0", "text": "<p> New features</p> <ul> <li>Two new plot methods: plot_terminator_improvement and plot_timeline.</li> </ul> <p> Enhancements</p> <ul> <li>Data splits in every trial are now properly stratified according to the   selected strategy.</li> <li>Performance optimization for multiple methods using smart caching.</li> <li>Improved visualizations for plots with logarithmic hyperparameters.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where parameters in a trial would not match with those displayed.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-512", "title": "Version 5.1.2", "text": "<p> API changes</p> <ul> <li>The default <code>strategy</code> for the <code>encode</code> method has   changed from \"LeaveOneOut\" to \"Target\"-encoding. LeaveOneOut is no longer a   supported strategy.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed a bug where stratification failed for datasets where the target column was   not placed last.</li> <li>Fixed a bug where transformers with no <code>get_feature_names_out</code> method could fail.</li> <li>Fixed a bug where the FeatureSelector class could fail when transforming a   dataset with different column order than seen at fit time.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-511", "title": "Version 5.1.1", "text": "<p> API changes</p> <ul> <li>The <code>infrequent_to_value</code> parameter in the Encoder class is replaced with   <code>infrequent_to_value</code> to be consistent with sklearn's naming convention.</li> </ul> <p> Enhancements</p> <ul> <li>Added the <code>kwargs</code> parameter to the save_data method.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed an installation issue for systems without an x86 architecture.</li> <li>Fixed a bug where Voting would fail for certain metrics.</li> <li>Fixed a bug where the automl method would fail for some transformers.</li> <li>Fixed a bug where the time metric in mlflow was always zero.</li> <li>Fixed a bug where shap plots wouldn't display the full column names.</li> <li>Fixed a bug where column names where not properly propagated during   transformation.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-510", "title": "Version 5.1.0", "text": "<p> New features</p> <ul> <li>Support for multilabel classification, multiclass-multilabel classification   and multioutput regression tasks. Read more in the user guide.</li> <li>New backend parameter to choose a parallel execution   backend.</li> <li>New <code>parallel</code> parameter to train multiple models   simultaneously.</li> <li>Integration with DAGsHub to store your mlflow experiments.   Read more in the user guide.</li> <li>New serve method to deploy models to a rest API endpoint.</li> <li>New get_best_threshold method to calculate the   optimal threshold for binary and multilabel tasks.</li> <li>New get_sample_weight method to calculate   the sample weights for a balanced data set.</li> </ul> <p> API changes</p> <ul> <li>The <code>report</code> method is renamed to eda.</li> <li>The <code>ATOMLoader</code> class is deprecated in favor of the load method.</li> <li>The <code>errors</code> attribute for runners is deprecated.</li> </ul> <p> Enhancements</p> <ul> <li>Added three new notebook examples.</li> <li>Added the <code>drop_chars</code> parameter to the Cleaner class.</li> <li>Added the <code>errors</code> parameter to the trainers.</li> <li>Rework of the dependencies, making the base package more lightweight.</li> <li>The logging entries for external libraries are redirected to atom's   file handler.</li> </ul> <p> Bug fixes</p> <ul> <li>Fixed multiple errors that appeared after sklearn's 1.2 update.</li> <li>Fixed a bug where hyperparameter tuning could fail for multi-metric runs.</li> <li>Fixed a bug where trials would try to report multiple times the same step.</li> <li>Fixed a bug where custom models could skip in-training validation.</li> <li>Fixed an issue where the bootstrapping estimators were trained using   <code>partial_fit</code>.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-501", "title": "Version 5.0.1", "text": "<p> Bug fixes</p> <ul> <li>Fixed installation issue.</li> <li>Updated package dependencies.</li> </ul> <p></p>"}, {"location": "changelog/v5.x.x/#version-500", "title": "Version 5.0.0", "text": "<p> New features</p> <ul> <li>Completely new hyperparameter tuning process.</li> <li>Completely reworked plotting interface.</li> <li>Accelerate your pipelines with sklearnex.</li> <li>New FeatureGrouper class to extract statistical features from   similar groups.</li> <li>New create_app method to create a nice front-end   for model predictions.</li> <li>New inverse_transform method for   atom and models.</li> <li>New linear model: OrthogonalMatchingPursuit.</li> <li>The plot_results method now accepts time metrics.</li> </ul> <p> API changes</p> <ul> <li>The <code>gpu</code> parameter is deprecated in favor of <code>device</code>   and <code>engine</code>.</li> <li>Refactor of the Cleaner, Discretizer, Encoder and FeatureSelector   classes.</li> <li>Refactor of all shap plots.</li> <li>Refactor of the apply method.</li> <li>The <code>plot_scatter_matrix</code> method is renamed to plot_relationships.</li> <li>The <code>kSVM</code> model is renamed to SVM.</li> <li>Multidimensional datasets are no longer supported. Check the deep learning   section of the user guide for guidance with such datasets.</li> <li>The <code>greater_is_better</code>, <code>needs_proba</code> and <code>needs_threshold</code> parameters are   deprecated. Metric functions are now created using make_scorer's   default parameters.</li> <li>The <code>drop</code> method is removed from atom. Use the reworked apply   method instead.</li> <li>The prediction methods can no longer be called from atom.</li> <li>The dashboard method for models is now called create_dashboard.</li> </ul> <p> Enhancements</p> <ul> <li>New examples for plotting, automated feature scaling,   pruning and advanced hyperparameter tuning.</li> <li>The Normalizer class can now be accelerated with GPU.</li> <li>The Scaler class now ignores binary columns (only 0s and 1s).</li> <li>The <code>models</code> parameter in plot and utility methods now accepts model indices.</li> <li>The transform method now also transforms   only <code>y</code> when <code>X</code> has a default value.</li> <li>The prediction methods now return pandas objects.</li> <li>Dependency versions are checked with originals after unpickling.</li> <li>Automatic generation of documentation from docstrings.</li> <li>Improvements in documentation display for mobile phones.</li> <li>New <code>feature_importance</code> attribute for models.</li> <li>Added a visualization for automated feature scaling to plot_pipeline.</li> </ul> <p> Bug fixes</p> <ul> <li>The FeatureExtractor class no longer raises a warning for highly   fragmented dataframes.</li> <li>Fixed a bug where models could not call the score function.</li> <li>The Encoder class no longer fails when the user provides ordinal   values that are not present during fitting.</li> <li>Fixed a bug with the <code>max_nan_rows</code> parameter in the Imputer class.</li> <li>Fixed a bug where Tokenizer could fail when no ngrams were found.</li> </ul>"}, {"location": "user_guide/accelerating/", "title": "Accelerating pipelines", "text": "<p>For very large datasets, ATOM offers various ways to accelerate its pipeline:</p> <ul> <li>Run estimators on GPU</li> <li>Use a faster data engine</li> <li>Use a faster estimator engine</li> <li>Run processes in parallel</li> </ul> <p>Warning</p> <p>Performance improvements are usually noticeable for datasets larger  than ~5M rows. For smaller datasets, using other values than the default can even harm performance!</p>"}, {"location": "user_guide/accelerating/#gpu-acceleration", "title": "GPU acceleration", "text": "<p>Graphics Processing Units (GPUs) can significantly accelerate calculations for preprocessing steps or training machine learning models. Training models involves compute-intensive matrix multiplications and other operations that can take advantage of a GPU's massively parallel architecture. Training on large datasets can take hours to run on a single processor. However, if you offload those tasks to a GPU, you can reduce training time to minutes instead.</p> <p>Running transformers and models in atom using a GPU is as easy as initializing the instance with parameter <code>device=\"gpu\"</code>. The <code>device</code> parameter accepts any string that follows the SYCL_DEVICE_FILTER filter selector. Examples are:</p> <ul> <li>device=\"cpu\" (use CPU)</li> <li>device=\"gpu\" (use default GPU)</li> <li>device=\"gpu:0\" (use first GPU)</li> <li>device=\"gpu:1\" (use second GPU)</li> </ul> <p>Combine GPU acceleration with the cuml and sklearnex estimator engines. The XGBoost, LightGBM and CatBoost models come with their own GPU engine. Setting <code>device=\"gpu\"</code> is sufficient to accelerate them with GPU, regardless of the engine parameter.</p> <p>Warning</p> <p>ATOM does not support multi-GPU training. If there is more than one GPU on the machine and the <code>device</code> parameter does not specify which one to use, the first one is used by default.</p> <p>Example</p> <p> Train a model on a GPU yourself using SageMaker Studio Lab. Just click on the badge above and run the notebook! Make sure to choose the GPU compute type.</p>"}, {"location": "user_guide/accelerating/#data-acceleration", "title": "Data acceleration", "text": "<p>The data engine can be specified through the <code>engine</code> parameter, which takes a dict with a key <code>data</code> that accepts three values: numpy, pyarrow and modin.</p>"}, {"location": "user_guide/accelerating/#numpy", "title": "numpy", "text": "<p>ATOM uses <code>pandas</code> as the default library for data handling, which in turn, uses <code>numpy</code> for all data processing.</p>"}, {"location": "user_guide/accelerating/#pyarrow", "title": "pyarrow", "text": "<p>PyArrow is a library that provides a way to work with Apache Arrow memory structures. Apache Arrow is a cross-language, platform-independent, in-memory data format that provides an efficient and fast way to serialize and deserialize data. Pandas offers native integration with pyarrow, which atom uses when specifying the pyarrow data engine.</p> <p>Warning</p> <ul> <li>The pyarrow backend doesn't work for sparse datasets. If the   dataset has any sparse columns, an exception is raised.</li> <li>The LightGBM and XGBoost models don't support pyarrow   dtypes.</li> </ul>"}, {"location": "user_guide/accelerating/#modin", "title": "modin", "text": "<p>The modin library is a multi-threading, drop-in replacement for pandas, that uses Ray as backend.</p>"}, {"location": "user_guide/accelerating/#estimator-acceleration", "title": "Estimator acceleration", "text": "<p>The estimator engine can be specified through the <code>engine</code> parameter, which takes a dict with a key <code>estimator</code> that accepts three values: sklearn, sklearnex and cuml. Read here how to run the estimators on GPU instead of CPU.</p> <p>Warning</p> <p>Estimators accelerated with sklearnex or cuML sometimes use slightly different hyperparameters than their sklearn counterparts.</p>"}, {"location": "user_guide/accelerating/#sklearn", "title": "sklearn", "text": "<p>This is the default option, which uses the standard estimators from sklearn. Sklearn does not support training on GPU.</p>"}, {"location": "user_guide/accelerating/#sklearnex", "title": "sklearnex", "text": "<p>The Intel\u00ae Extension for Scikit-learn package (or sklearnex, for brevity) accelerates sklearn models and transformers, keeping full conformance with sklearn's API. Sklearnex is a free software AI accelerator that offers a way to make sklearn code 10\u2013100 times faster. The software acceleration is achieved through the use of vector instructions, IA hardware-specific memory optimizations, threading, and optimizations for all upcoming Intel platforms at launch time. See here an example using the sklearnex engine.</p> <p>Warning</p> <p>sklearnex estimators don't support sparse datasets nor multioutput tasks.</p> <p>Tip</p> <p>Intel\u00ae processors provide better performance than other CPUs.</p>"}, {"location": "user_guide/accelerating/#prerequisites", "title": "Prerequisites", "text": "<ul> <li>Operating System:<ul> <li>Linux (Ubuntu, Fedora, etc...)</li> <li>Windows 8.1+</li> <li>macOS (no GPU support)</li> </ul> </li> <li>CPU:<ul> <li>Processor must have x86 architecture.</li> <li>Processor must support at least one of SSE2, AVX, AVX2, AVX512 instruction sets.</li> <li>ARM* architecture is not supported.</li> </ul> </li> <li>GPU:<ul> <li>All Intel\u00ae integrated and discrete GPUs.</li> <li>Intel\u00ae GPU drivers.</li> </ul> </li> <li>Libraries:<ul> <li>sklearnex&gt;=2023.2.1 (automatically installed with atom when the processor has x86 architecture)</li> <li>dpcpp_cpp_rt&gt;=2023.2  (only for GPU acceleration)</li> </ul> </li> </ul>"}, {"location": "user_guide/accelerating/#supported-estimators", "title": "Supported estimators", "text": "<ul> <li>Pruner (only for strategy=\"dbscan\")</li> <li> <p>FeatureSelector (only for strategy=\"pca\" and dense datasets)</p> </li> <li> <p>ElasticNet (only for CPU acceleration)</p> </li> <li>KNearestNeighbors</li> <li>Lasso (only for CPU acceleration)</li> <li>LogisticRegression</li> <li>OrdinaryLeastSquares</li> <li>RandomForest</li> <li>Ridge (only for regression tasks and CPU acceleration)</li> <li>SupportVectorMachine (GPU acceleration only supports classification tasks)</li> </ul>"}, {"location": "user_guide/accelerating/#cuml", "title": "cuML", "text": "<p>cuML is the machine learning library of the RAPIDS project. cuML enables you to run traditional tabular ML tasks on GPUs without going into the details of CUDA programming. For large datasets, these GPU-based implementations can complete 10-50x faster than their CPU equivalents.</p> <p>Warning</p> <ul> <li>cuML estimators don't support multioutput tasks nor the pyarrow   data engine.</li> <li>Install cuML using <code>pip install --extra-index-url=https://pypi.nvidia.com   cuml-cu11</code> or <code>pip install --extra-index-url=https://pypi.nvidia.com   cuml-cu12</code> depending on your CUDA version. Read more about RAPIDS'   installation here.</li> </ul> <p>Tip</p> <p>Only transformers and predictors are converted to the requested engine. To use a metric from cuML, insert it directly in the <code>run</code> method:</p> <pre><code>from atom import ATOMClassifier\nfrom cuml.metrics import accuracy_score\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=100, random_state=1)\n\natom = ATOMClassifier(X, y, engine={\"estimator\": \"cuml\"}, verbose=2)\natom.run(\"LR\", metric=accuracy_score)\n</code></pre>"}, {"location": "user_guide/accelerating/#prerequisites_1", "title": "Prerequisites", "text": "<ul> <li>Operating System:<ul> <li>Ubuntu 18.04/20.04 or CentOS 7/8 with gcc/++ 9.0+</li> <li>Windows 10+ with WSL2 (see here a tutorial)</li> </ul> </li> <li>GPU:<ul> <li>NVIDIA Pascal\u2122 or better with compute capability 6.0+</li> </ul> </li> <li>Drivers:<ul> <li>CUDA &amp; NVIDIA Drivers of versions 11.0, 11.2, 11.4 or 11.5</li> </ul> </li> <li>Libraries:<ul> <li>cuML&gt;=23.08</li> </ul> </li> </ul>"}, {"location": "user_guide/accelerating/#supported-estimators_1", "title": "Supported estimators", "text": "<ul> <li>Cleaner</li> <li>Discretizer</li> <li>Imputer (only for strat_num!=\"knn\")</li> <li>Normalizer</li> <li>Pruner (only for strategy=\"dbscan\" and \"hdbscan\")</li> <li>Scaler</li> <li>Vectorizer</li> <li> <p>FeatureSelector (only for strategy=\"pca\")</p> </li> <li> <p>BernoulliNB</p> </li> <li>CategoricalNB</li> <li>ElasticNet</li> <li>GaussianNB</li> <li>KNearestNeighbors</li> <li>Lasso</li> <li>LinearSVM</li> <li>LogisticRegression</li> <li>MultinomialNB</li> <li>OrdinaryLeastSquares</li> <li>RandomForest</li> <li>Ridge (only for regression tasks)</li> <li>SupportVectorMachine</li> </ul>"}, {"location": "user_guide/accelerating/#parallel-execution", "title": "Parallel execution", "text": "<p>Another way to accelerate your pipelines is executing processes in parallel. Use the <code>backend</code> parameter to select one of several parallelization backends.</p> <ul> <li>loky: Used by default, can induce some communication and memory overhead   when exchanging input and output data with the worker Python processes. On   some rare systems (such as Pyiodide), the loky backend may not be available.</li> <li>multiprocessing: Previous process-based backend based on <code>multiprocessing.Pool</code>.   Less robust than loky.</li> <li>threading: Very low-overhead backend but it suffers from the Python Global   Interpreter Lock if the called function relies a lot on Python objects. It's    mostly useful when the execution bottleneck is a compiled extension that   explicitly releases the GIL (for instance a Cython loop wrapped in a \"with nogil\"   block or an expensive call to a library such as numpy).</li> <li>ray: Ray is an open-source unified compute framework   that makes it easy to scale AI and Python workloads. Read more about Ray here.   See here an example use case.</li> </ul> <p>The parallelization backend is applied in the following cases:</p> <ul> <li>In every individual estimator that uses parallelization internally.</li> <li>To calculate cross-validated results during hyperparameter tuning.</li> <li>To train multiple models in parallel (when the trainer's <code>parallel</code> parameter is True).</li> <li>To calculate partial dependencies in plot_partial_dependence.</li> </ul> <p>Note</p> <p>The <code>njobs</code> parameter sets the number of cores for the individual models as well as for parallel training. You won't gain much training two models in parallel with 2 cores, when the models also parallelize computations internally. Instead, use parallel training for models that can't parallelize their training (their constructor doesn't have the <code>n_jobs</code> parameter).</p>"}, {"location": "user_guide/data_cleaning/", "title": "Data cleaning", "text": "<p>More often than not, you'll need to do some data cleaning before fitting your dataset to a model.  Usually, this involves importing different libraries and writing many lines of code. Since ATOM is all about fast exploration  and experimentation, it provides various data cleaning classes to apply the most common transformations fast and easy.</p> <p>Note</p> <p>All of atom's data cleaning methods automatically adopt the relevant transformer attributes (<code>n_jobs</code>, <code>verbose</code>, <code>logger</code>, <code>random_state</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.scale(verbose=2)</code>.</p> <p>Note</p> <p>Like the add method, the data cleaning methods accept the <code>columns</code> parameter to only transform a subset of the dataset's features, e.g. <code>atom.scale(columns=[0, 1])</code>.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#balancing-the-data", "title": "Balancing the data", "text": "<p>One of the common issues found in datasets that are used for classification is imbalanced classes. Data imbalance usually reflects an unequal distribution of classes within a dataset. For example, in a credit card fraud detection dataset, most of the transactions are non-fraud, and a very few cases are fraud. This leaves us with a very unbalanced ratio of fraud vs non-fraud cases. The Balancer class can oversample the minority class or undersample the majority class using any of the transformers implemented in the imblearn package. It can be  accessed from atom through the balance method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#standard-data-cleaning", "title": "Standard data cleaning", "text": "<p>There are many data cleaning steps that are useful to perform on any dataset before modelling. These are general rules that apply almost on every use-case and every task. The Cleaner class is a convenient tool to apply such steps. It can be accessed from atom through the clean method. Use the class' parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Drop columns with specific data types.</li> <li>Strip categorical features from white spaces.</li> <li>Drop duplicate rows.</li> <li>Drop rows with missing values in the target column.</li> <li>Encode the target column.</li> </ul> <p></p>"}, {"location": "user_guide/data_cleaning/#binning-numerical-features", "title": "Binning numerical features", "text": "<p>Discretization (otherwise known as quantization or binning) provides a way to partition continuous features into discrete values. Certain datasets with continuous features may benefit from discretization, because discretization can transform the dataset of continuous attributes to one with only nominal attributes. Discretization is similar to constructing histograms for continuous data. However, histograms focus on counting features which fall into particular bins, whereas discretization focuses on assigning feature values to these bins. The Discretizer class can be used to bin continuous data into intervals. It can be accessed from atom through the discretize method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#encoding-categorical-features", "title": "Encoding categorical features", "text": "<p>Many datasets contain categorical features. Their variables are typically stored as text values which represent various classes. Some examples include color (\u201cRed\u201d, \u201cYellow\u201d, \u201cBlue\u201d), size (\u201cSmall\u201d, \u201cMedium\u201d, \u201cLarge\u201d) or geographic designations (city or country). Regardless of what the value is used for, the challenge is determining how to use this data in the analysis. The majority of sklearn's models don't support direct manipulation of this kind of data. Use the Encoder class to encode categorical features to numerical values. It can be  accessed from atom through the encode method.</p> <p>There are many strategies to encode categorical columns. The Encoder class applies one strategy or another depending on the number of classes in the column to be encoded. When there are only two, the values are encoded with 0 or 1. When there are more than two, the columns can be encoded using one-hot encoding or any other strategy of the category-encoders package, depending on the value of the <code>max_onehot</code> parameter. One-hot encodes the column making a dummy feature for every class. This approach preserves all the information but increases the size of the dataset considerably, making it often an undesirable strategy for high cardinality features. Other strategies like Target transform the column in place.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#imputing-missing-values", "title": "Imputing missing values", "text": "<p>For various reasons, many real world datasets contain missing values, often encoded as blanks, NaNs or other placeholders. Such datasets however are incompatible with ATOM's models which assume that all values in an array are numerical, and that all have and hold meaning. The Imputer class handles missing values in the dataset by either dropping or imputing the value. It can be accessed from atom through the impute method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#normalizing-the-feature-set", "title": "Normalizing the feature set", "text": "<p>Use the Normalizer class to transform the feature set to follow a Normal (Gaussian)-like distribution. In general, data must be transformed when using models that assume normality in the residuals. Examples of such models are LogisticRegression, LinearDiscriminantAnalysis and GaussianNB. The class can be accessed from atom through the normalize method.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#handling-outliers", "title": "Handling outliers", "text": "<p>When modelling, it is important to clean the data sample to ensure that the observations best represent the problem. Sometimes a dataset can contain extreme values that are outside the range of what is expected and unlike the other data. These are called outliers. Often, machine learning modelling and model skill in general can be improved by  understanding and even removing these outlier samples. The Pruner class offers 7 different strategies to detect outliers (described hereunder). It can be accessed from atom through the prune method.</p> <p>z-score The z-score of a value in the dataset is defined as the number of standard deviations by which the value is above or below the mean of the column. Values above or below a certain threshold (specified with the parameter <code>max_sigma</code>) are considered outliers. Note that, contrary to the rest of the strategies, this approach selects outlier values, not outlier samples! Because of this, it is possible to replace the outlier value instead of dropping the entire sample.</p> <p>Isolation Forest Uses a tree-based anomaly detection algorithm. It is based on modeling the normal data in such a way as to isolate anomalies that are both few and different in the feature space. Read more in sklearn's documentation.</p> <p>Elliptic Envelope If the input variables have a Gaussian distribution, then simple statistical methods can be used to detect outliers. For example, if the dataset has two input variables and both are Gaussian, the feature space forms a multidimensional Gaussian, and knowledge of this distribution can be used to identify values far from the distribution. This approach can be generalized by defining a hypersphere (ellipsoid) that covers the normal data, and data that falls outside this shape is considered an outlier. Read more in sklearn's documentation.</p> <p>Local Outlier Factor A simple approach to identifying outliers is to locate those examples that are far from the other examples in the feature space. This can work well for feature spaces with low dimensionality (few features) but becomes less reliable as the number of features is increased. The local outlier factor is a technique that attempts to harness the idea of nearest neighbors for outlier detection. Each example is assigned a score of how isolated or how likely it is to be outliers based on the size of its local neighborhood. Those examples with the largest score are more likely to be outliers. Read more in sklearn's documentation.</p> <p>One-class SVM The support vector machine algorithm, initially developed for binary classification tasks, can also be used for one-class classification. When modeling one class, the algorithm captures the density of the majority class and classifies examples on the extremes of the density function as outliers. This modification of SVM is referred to as One-Class SVM. Read more in sklearn's documentation.</p> <p>DBSCAN The DBSCAN algorithm views clusters as areas of high density separated by areas of low density. Due to this rather generic view, clusters found by DBSCAN can be any shape, as opposed to k-means which assumes that clusters are convex shaped. Samples that lie outside any cluster are considered outliers. Read more in sklearn's documentation.</p> <p>OPTICS The OPTICS algorithm shares many similarities with the DBSCAN algorithm, and can be considered a generalization of DBSCAN that relaxes the <code>eps</code> requirement from a single value to a value range. The key difference between DBSCAN and OPTICS is that the OPTICS algorithm builds a reachability graph, and a spot within the cluster ordering. These two attributes are assigned when the model is fitted, and are used to determine cluster membership. Read more in sklearn's documentation.</p> <p></p>"}, {"location": "user_guide/data_cleaning/#scaling-the-feature-set", "title": "Scaling the feature set", "text": "<p>Standardization of a dataset is a common requirement for many machine learning estimators; they might behave badly if the individual features do not more or less look like standard normally distributed data (e.g. Gaussian with zero mean and unit variance). The Scaler class let you quickly scale atom's dataset using one of sklearn's scalers. It can be accessed from atom through the scale method. </p> <p>Info</p> <p>All strategies can utilize GPU speed-up. Click here for further information about GPU acceleration.</p>"}, {"location": "user_guide/data_management/", "title": "Data management", "text": ""}, {"location": "user_guide/data_management/#data-sets", "title": "Data sets", "text": "<p>ATOM is designed to work around one single dataset: the one with which atom is initialized. This is the dataset you want to explore, transform, and use for model training and validation. ATOM differentiates three different data sets:</p> <ul> <li>The training set is usually the largest of the data sets. As the   name suggests, this set is used to train the pipeline. During   hyperparameter tuning, only the training set is used to fit and   evaluate the estimator in every call. The training set in the current   branch can be accessed through the <code>train</code> attribute. It's   features and target can be accessed through <code>X_train</code> and <code>y_train</code>   respectively.</li> <li>The test set is used to evaluate the models. The model scores on   this set give an indication on how the model performs on new data. The   test set can be accessed through the <code>test</code> attribute. It's features   and target can be accessed through <code>X_test</code> and <code>y_test</code> respectively.</li> <li>The holdout set is an optional, separate set that should only be   used to evaluate the final model's performance. Create this set when   you are going to use the test set for an intermediate validation step.   The holdout set is immediately set apart during initialization and is   not considered part of atom's dataset (the <code>dataset</code> attribute only   returns the train and test sets). The holdout set is left untouched   until predictions are made on it, i.e. it does not undergo any pipeline   transformations. The holdout set is stored in atom's <code>holdout</code> attribute.   It's features and target can not be accessed separately. See here   an example that shows how to use the holdout data set.</li> </ul> <p>The data can be provided in different formats. If the data sets are not specified beforehand, you can input the features and target separately or together:</p> <ul> <li>X</li> <li>X, y</li> </ul> <p>Remember to use the <code>y</code> parameter to indicate the target column in X when using the first option. If not specified, the last column in X is used as target. In both these cases, the size of the sets are defined using the <code>test_size</code> and <code>holdout_size</code> parameters. Note that the splits are made after the subsample of the dataset with the <code>n_rows</code> parameter (when not left to its default value).</p> <p>If you already have the separate data sets, provide them using one of the following formats:</p> <ul> <li>train, test</li> <li>train, test, holdout</li> <li>X_train, X_test, y_train, y_test</li> <li>X_train, X_test, X_holdout, y_train, y_test, y_holdout</li> <li>(X_train, y_train), (X_test, y_test)</li> <li>(X_train, y_train), (X_test, y_test), (X_holdout, y_holdout)</li> </ul> <p>The input data is always converted internally to a pandas dataframe, if it isn't one already. The column names should always be strings. If they are not, atom changes their type at initialization. If no column names are provided, default names are given of the form <code>X[N-1]</code>, where N stands for the n-th feature in the dataset.</p> <p></p>"}, {"location": "user_guide/data_management/#indexing", "title": "Indexing", "text": "<p>By default, atom resets the dataframe's index after initialization and after every transformation in the pipeline. To avoid this, specify the <code>index</code> parameter. If the dataset has an 'identifier' column, it is useful to use it as index for two reasons:</p> <ul> <li>An identifier doesn't usually contain any useful information   on the target column, and should therefore be removed before training.</li> <li>Predictions of specific rows can be accessed through   their index.</li> </ul> <p>Warning</p> <p>Avoid duplicate indices in the dataframe. Having them may potentially lead to unexpected behavior.</p> <p></p>"}, {"location": "user_guide/data_management/#sparse-datasets", "title": "Sparse datasets", "text": "<p>If atom is initialized using a scipy sparse matrix, it is converted internally to a dataframe of sparse columns. Read more about pandas' sparse data structures here. The same conversion takes place when a transformer returns a sparse matrix, like for example, the Vectorizer.</p> <p>Note that ATOM considers a dataset to be sparse if any of the columns is sparse. A dataset can only benefit from sparsity when all its columns are sparse, hence mixing sparse and non-sparse columns is not recommended and can cause estimators to decrease their training speed or even crash. Use the shrink method to convert dense features to sparse and the available_models method to check which models have native support for sparse matrices.</p> <p>Click here to see an example that uses sparse data.</p> <p></p>"}, {"location": "user_guide/data_management/#multioutput-tasks", "title": "Multioutput tasks", "text": "<p>Multioutput is a task where there are more than one target column, i.e. the goal is to predict multiple targets at the same time. When providing a dataframe as target, use the y parameter. Providing <code>y</code> without keyword makes ATOM think you are providing <code>train, test</code> (see the data sets section).</p>"}, {"location": "user_guide/data_management/#task-types", "title": "Task types", "text": "<p>ATOM recognizes four multioutput tasks.</p> <p>Note</p> <p>Combinations of binary and multiclass target columns are treated as multiclass-multioutput tasks.</p>"}, {"location": "user_guide/data_management/#multilabel", "title": "Multilabel", "text": "<p>Multilabel is a classification task, labeling each sample with <code>m</code> labels from <code>n_classes</code> possible classes, where <code>m</code> can be 0 to <code>n_classes</code> inclusive. This can be thought of as predicting properties of a sample that are not mutually exclusive.</p> <p>For example, prediction of the topics relevant to a text document. The document may be about one of religion, politics, finance or education, several of the topic classes or all of the topic classes. The target column (<code>atom.y</code>) could look like this:</p> <pre><code>0                        [politics]\n1               [religion, finance]\n2    [politics, finance, education]\n3                                []\n4                         [finance]\n5               [finance, religion]\n6                         [finance]\n7               [religion, finance]\n8                       [education]\n9     [finance, religion, politics]\n\nName: target, dtype: object\n</code></pre> <p>A model can not directly ingest a variable amount of target classes. Use the clean method to assign a binary output to each class, for every sample. Positive classes are indicated with 1 and negative classes with 0. It is thus comparable to running n_classes binary classification tasks. In our example, the target (<code>atom.y</code>) is converted to:</p> <pre><code>   education  finance  politics  religion\n0          0        0         1         0\n1          0        1         0         1\n2          1        1         1         0\n3          0        0         0         0\n4          0        1         0         0\n5          0        1         0         1\n6          0        1         0         0\n7          0        1         0         1\n8          1        0         0         0\n9          0        1         1         1\n</code></pre>"}, {"location": "user_guide/data_management/#multiclass-multioutput", "title": "Multiclass-multioutput", "text": "<p>Multiclass-multioutput (also known as multitask classification) is a classification task which labels each sample with a set of non-binary properties. Both the number of properties and the number of classes per property is greater than 2. A single estimator thus handles several joint classification tasks. This is both a generalization of the multilabel classification task, which only considers binary attributes, as well as a generalization of the multiclass classification task, where only one property is considered.</p> <p>For example, classification of the properties \"type of fruit\" and \"colour\" for a set of images of fruit. The property \"type of fruit\" has the possible classes: \"apple\", \"pear\" and \"orange\". The property \"colour\" has the possible classes: \"green\", \"red\", \"yellow\" and \"orange\". Each sample is an image of a fruit, a label is output for both properties and each label is one of the possible classes of the corresponding property.</p>"}, {"location": "user_guide/data_management/#multioutput-regression", "title": "Multioutput regression", "text": "<p>Multioutput regression predicts multiple numerical properties for each sample. Each property is a numerical variable and the number of properties to be predicted for each sample is &gt;= 2. Some estimators that support multioutput regression are faster than just running n_output estimators.</p> <p>For example, prediction of both wind speed and wind direction, in degrees, using data obtained at a certain location. Each sample would be data obtained at one location and both wind speed and direction would be output for each sample.</p>"}, {"location": "user_guide/data_management/#multivariate", "title": "Multivariate", "text": "<p>Multivariate is the multioutput task for forecasting. In this case, we try to forecast more than one time series at the same time.</p> <p>Although all forecasting models in ATOM support multivariate tasks, we differentiate two types of models:</p> <ul> <li>The \"native multivariate\" models apply forecasts where every prediction   of endogeneous (<code>y</code>) variables will depend on values of the other target   columns.</li> <li>The rest of the models apply an estimator per column, meaning that forecasts   will be made per endogeneous variable, and not be affected by other variables.   To access the column-wise estimators, use the estimator's <code>forecasters_</code>   parameter, which stores the fitted forecasters in a dataframe.</li> </ul> <p>Read more about time series tasks here.</p>"}, {"location": "user_guide/data_management/#native-multioutput-models", "title": "Native multioutput models", "text": "<p>Some models have native support for multioutput tasks. This means that the original estimator is used to make predictions directly on all the target columns. Examples of such models are KNearestNeighbors, RandomForest and ExtraTrees.</p>"}, {"location": "user_guide/data_management/#non-native-multioutput-models", "title": "Non-native multioutput models", "text": "<p>The majority of the models don't have integrated support for multioutput tasks. However, it's possible to still use them for such tasks, wrapping them in a meta-estimator capable of handling multiple target columns. For non-native multioutput models, ATOM does so automatically. For multilabel tasks, the meta-estimator is:</p> <ul> <li>ClassifierChain</li> </ul> <p>And for multiclass-multioutput and multioutput regression, the meta-estimators are respectively:</p> <ul> <li>MultioutputClassifier</li> <li>MultioutputRegressor</li> </ul> <p>Warning</p> <p>Currently, scikit-learn metrics do not support multiclass-multioutput classification tasks. In this case, ATOM calculates the mean of the selected metric over every individual target.</p> <p>Tip</p> <ul> <li>Set the <code>native_multilabel</code> or <code>native_multioutput</code> parameter in ATOMModel equal to <code>True</code> to ignore the meta-estimator for custom models.</li> <li>Check out the multilabel classification and multioutput regression examples.</li> </ul> <p></p>"}, {"location": "user_guide/data_management/#branches", "title": "Branches", "text": "<p>You might want to compare how a model performs on a dataset transformed through multiple pipelines, each using different transformers. For example, on one pipeline with an undersampling strategy and the other with an oversampling strategy. To be able to do this, ATOM has the branching system.</p> <p>The branching system helps the user to manage multiple data pipelines within the same atom instance. Branches are created and accessed through atom's <code>branch</code> property. A branch contains a specific pipeline, the dataset transformed through that pipeline, and all data and utility attributes that refer to that dataset. Transformers and models called from atom use the dataset in the current branch, as well as data attributes such as <code>atom.dataset</code>. It's not allowed to change the data in a branch after fitting a model with it. Instead, create a new branch for every unique pipeline.</p> <p>By default, atom starts with one branch called \"master\". To start a new branch, set a new name to the property, e.g. <code>atom.branch = \"undersample\"</code>. This creates a new branch from the current one. To create a branch from any other branch type \"_from_\" between the new name and the branch from which to split, e.g. <code>atom.branch = \"oversample_from_master\"</code> creates branch \"oversample\" from branch \"master\", even if the current branch is \"undersample\". To switch between existing branches, just type the name of the desired branch, e.g. <code>atom.branch = \"master\"</code> brings you back to the master branch. Note that every branch contains a unique copy of the whole dataset! Creating many branches can cause memory issues for large datasets.</p> <p>See the Imbalanced datasets or Feature engineering examples for branching use cases.</p> <p>Warning</p> <p>Always create a new branch if you want to change the dataset after fitting a model! Forcing a data change through the data property's <code>@setter</code> can cause unexpected model behaviour and break down the plotting methods.</p> <p></p> <p> </p> Figure 1. Diagram of a possible branch system to compare an oversampling with an undersampling pipeline. <p></p>"}, {"location": "user_guide/data_management/#memory-considerations", "title": "Memory considerations", "text": "<p>An atom instance stores one copy of the dataset for each branch (this doesn't include the holdout set, which is only stored once), and one copy of the initial dataset with which the instance is initialized. This copy of the original dataset is necessary to avoid data leakage during hyperparameter tuning and for some specific methods like cross_validate and reset). It's created as soon as there are no branches in the initial state (usually after calling the first data transformation). If the dataset is occupying too much memory, consider using the shrink method to convert the dtypes to their smallest possible matching dtype.</p> <p>Apart from the dataset itself, a model's prediction attributes (e.g. <code>atom.lr.predict_train</code>), metric scores and shap values are also stored as attributes of the model to avoid having to recalculate them every time they are needed. This data can occupy a considerable amount of memory for large datasets. You can delete all these attributes using the clear method in order to free some memory before saving the class.</p> <p></p>"}, {"location": "user_guide/data_management/#data-transformations", "title": "Data transformations", "text": "<p>Performing data transformations is a common requirement of many datasets before they are ready to be ingested by a model. ATOM provides various classes to apply data cleaning and feature engineering transformations to the data. This tooling should be able to help you apply most of the typically needed transformations to get the data ready for modelling. For further fine-tuning, it's also possible to transform the data using custom transformers (see the add method) or through a function (see the apply method). Remember that all transformations are only applied to the dataset in the current branch.</p>"}, {"location": "user_guide/feature_engineering/", "title": "Feature engineering", "text": "<p>Feature engineering is the process of creating new features from the existing ones, in order to capture relationships with the target column that the first set of features didn't have on their own. This process is very important to improve the performance of machine learning algorithms. Although feature engineering works best when the data  scientist applies use-case specific transformations, there are ways to do this in an automated manner, without prior domain knowledge. One of the problems of creating new features without human expert intervention, is that many of the newly created features can be useless, i.e. they do not help the algorithm to make better predictions. Even worse, having useless features can drop your performance. To avoid this, we perform feature selection, a process in which we select the relevant features  in the dataset. See the Feature engineering example.</p> <p>Note</p> <p>All of atom's feature engineering methods automatically adopt the relevant transformer attributes (<code>n_jobs</code>, <code>verbose</code>, <code>logger</code>, <code>random_state</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.feature_selection(\"pca\", n_features=10, random_state=2)</code>.</p> <p>Note</p> <p>Like the add method, the feature engineering methods accept the <code>columns</code> parameter to only transform a subset of the dataset's features, e.g. <code>atom.feature_selection(\"pca\",n_features=10, columns=slice(5, 15))</code>.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#extracting-datetime-features", "title": "Extracting datetime features", "text": "<p>Features that contain dates or timestamps can not be directly ingested by models since they are not strictly numerical. Encoding them as categorical features is not an option since the encoding does not capture the relationship between the different moments in time. The FeatureExtractor class creates new features extracting datetime elements (e.g. day, month, year, hour...) from the columns. It can be accessed from atom through the feature_extraction method. The new features are named equally to the column from which they are extracted, followed by an underscore and the datetime element they create, e.g. <code>x0_day</code> for the day element of <code>x0</code>.</p> <p>Note that many time features have a cyclic pattern, e.g. after Sunday comes Monday. This means that if we would encode the days of the week from 0 to 6, we would lose that relation. A common method used to encode cyclical features is to transform the data into two dimensions using a sine and cosine transformation:</p> \\[ x_{sin} = sin\\left(\\frac{2\\pi * x}{max(x)}\\right) \\] \\[ x_{cos} = cos\\left(\\frac{2\\pi * x}{max(x)}\\right) \\] <p>The resulting features have their names followed by sin or cos, e.g. <code>x0_day_sin</code> and <code>x0_day_cos</code>. The datetime elements that can be encoded in a cyclic fashion are: microsecond, second, minute, hour, weekday, day, day_of_year, month and quarter. Note that decision trees based algorithms build their split rules according to one feature at a time. This means that they will fail to correctly process cyclic features since the sin/cos values are expected to be considered as one single coordinate system.</p> <p>Use the <code>fmt</code> parameter to specify your feature's format in case the column is categorical. The FeatureExtractor class will convert the column to the datetime dtype before extracting the specified features. Click here for an overview of the available formats.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#generating-new-features", "title": "Generating new features", "text": "<p>The FeatureGenerator class creates new non-linear features based on the original feature set. It can be accessed from atom through the feature_generation method. You can choose between two strategies: Deep Feature Synthesis and Genetic Feature Generation.</p> <p>Deep Feature Synthesis Deep feature synthesis (DFS) applies the selected operators on the features in the dataset. For example, if the operator is \"log\", it will create the new feature <code>LOG(old_feature)</code> and if the operator is \"mul\", it will create the new feature <code>old_feature_1 x old_feature_2</code>. The operators can be chosen through the <code>operators</code> parameter. Choose from:</p> <ul> <li>add: Take the sum of two features.</li> <li>sub: Subtract two features from each other.</li> <li>mul: Multiply two features with each other.</li> <li>div: Divide two features with each other.</li> <li>abs: Calculate the absolute value of a feature.</li> <li>srqt: Calculate the square root of a feature.</li> <li>log: Calculate the natural logarithm of a feature.</li> <li>sin: Calculate the sine of a feature.</li> <li>cos: Calculate the cosine of a feature.</li> <li>tan: Calculate the tangent of a feature.</li> </ul> <p>ATOM's implementation of DFS uses the featuretools package.</p> <p></p> <p>Genetic Feature Generation Genetic feature generation (GFG) uses genetic programming, a branch of evolutionary programming, to determine which features are successful and create new ones based on those. Where dfs can be seen as some kind of \"brute force\" for feature engineering, gfg tries to improve its features with every generation of the algorithm. gfg uses the same operators as dfs, but instead of only applying the transformations once, it evolves them further, creating nested structures of combinations of features. The new features are given the name <code>feature_n</code>, where n stands for the n-th feature in the dataset. You can access the genetic feature's fitness and description (how they are calculated) through the <code>genetic_features</code> attribute.</p> <p>ATOM uses the SymbolicTransformer class from the gplearn package for the genetic algorithm. Read more about this implementation here.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#grouping-similar-features", "title": "Grouping similar features", "text": "<p>When your dataset contains many similar features corresponding to a certain natural group or entity, it's possible to replace these features for a handful of them, that should capture the relations of the group, in order to lose as little information as possible. To achieve this, the FeatureGrouper class computes certain statistical properties that describe the group's distribution, like the mean or the median, and replaces the columns with the result of these statistical calculations over every row in the dataset. The goal of this approach is to reduce the number of columns in the dataset, avoiding the curse of dimensionality.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#selecting-useful-features", "title": "Selecting useful features", "text": "<p>The FeatureSelector class provides tooling to select the relevant features from a dataset. It can be accessed from atom through the feature_selection method.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#standard-strategies", "title": "Standard strategies", "text": "<p> Univariate Univariate feature selection works by selecting the best features based on univariate statistical F-test. The test is provided via the <code>solver</code> parameter. It takes any function taking two arrays (X, y), and returning arrays (scores, p-values). Read more in sklearn's documentation.</p> <p></p> <p> Principal Components Analysis Applying PCA reduces the dimensionality of the dataset by maximizing the variance of each dimension. The new features are called <code>pca0</code>, <code>pca1</code>, etc... PCA can be applied in three ways:</p> <ul> <li>If the data is dense (i.e. not sparse), the estimator used is PCA.   Before fitting the transformer, the data is scaled to mean=0 and std=1   if it wasn't already. Read more in sklearn's documentation.</li> <li>If the data is [sparse][sparse datasets] (often the case for term-document   matrices, see Vectorizer), the estimator used is TruncatedSVD.   Read more in sklearn's documentation.</li> <li>If <code>engine</code> is \"sklearnex\" or \"cuml\", the estimator   used is the package's PCA implementation. Sparse data is not supported for   neither engine.</li> </ul> <p></p> <p> Selection from model SFM uses an estimator with <code>feature_importances_</code> or <code>coef_</code> attributes to select the best features in a dataset based on importance weights. The estimator is provided through the <code>solver</code> parameter and can be already fitted. ATOM allows you to use one its predefined models, e.g. <code>solver=\"RF\"</code>. If you didn't call the FeatureSelector through atom, don't forget to indicate the estimator's task adding <code>_class</code> or <code>_reg</code> after the name, e.g. <code>RF_class</code> to use a random forest classifier. Read more in sklearn's documentation.</p> <p></p> <p> Sequential Feature Selection Sequential feature selection adds (forward selection) or removes (backward selection) features to form a feature subset in a greedy fashion. At each stage, this estimator chooses the best feature to add or remove based on the cross-validation score of an estimator. Read more in sklearn's documentation.</p> <p></p> <p> Recursive Feature Elimination Select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of features, and the importance of each feature is obtained either through a <code>coef_</code> or through a <code>feature_importances_</code> attribute. Then, the least important features are pruned from current set of features. That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached. Note that, since RFE needs to fit the model again every iteration, this method can be fairly slow.</p> <p>RFECV applies the same algorithm as RFE but uses a cross-validated metric (under the scoring parameter, see RFECV) to assess every step's performance. Also, where RFE returns the number of features selected by <code>n_features</code>, RFECV returns the number of features that achieved the optimal score on the specified metric. Note that this is not always equal to the amount specified by <code>n_features</code>. Read more in sklearn's documentation.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#advanced-strategies", "title": "Advanced strategies", "text": "<p>The following strategies are a collection of nature-inspired optimization algorithms that maximize an objective function. If not manually specified, the function calculates the cross-validated score of a model on the data. Use the <code>scoring</code> parameter (not present in description, part of kwargs) to specify the metric to optimize on.</p> <p></p> <p> Particle Swarm Optimization Particle Swarm Optimization (PSO) optimizes a problem by having a population of candidate solutions (particles), and moving them around in the search-space according to simple mathematical formula over the particle's position and velocity. Each particle's movement is influenced by its local best known position, but is also guided toward the best known positions in the search-space, which are updated as better positions are found by other particles. This is expected to move the swarm toward the best solutions. Read more here.</p> <p></p> <p> Harris Hawks Optimization Harris Hawks Optimization (HHO) mimics the action and reaction of Hawk's team collaboration hunting in nature and prey escaping to discover the solutions of the single-objective problem. Read more here.</p> <p></p> <p> Grey Wolf Optimization The Grey Wolf Optimizer (GWO) mimics the leadership hierarchy and hunting mechanism of grey wolves in nature. Four types of grey wolves such as alpha, beta, delta, and omega are employed for simulating the leadership hierarchy. In addition, three main steps of hunting, searching for prey, encircling prey, and attacking prey, are implemented to perform optimization. Read more here.</p> <p></p> <p> Dragonfly Optimization The Dragonfly Algorithm (DFO) algorithm originates from static and dynamic swarming behaviours. These two swarming behaviours are very similar to the two main phases of optimization using meta-heuristics: exploration and exploitation. Dragonflies create sub swarms and fly over different areas in a static swarm, which is the main objective of the exploration phase. In the static swarm, however, dragonflies fly in bigger swarms and along one direction, which is favourable in the exploitation phase. Read more here.</p> <p></p> <p> Genetic Optimization Genetic Optimization is a metaheuristic inspired by the process of natural selection that belongs to the larger class of evolutionary algorithms. Genetic algorithms are commonly used to generate high-quality solutions to optimization and search problems by relying on biologically inspired operators such as mutation, crossover and selection. Read more here.</p> <p></p>"}, {"location": "user_guide/feature_engineering/#other-selection-methods", "title": "Other selection methods", "text": "<p>Removing features with low or high variance Variance is the expectation of the squared deviation of a random variable from its mean. Features with low variance have many values repeated, which means the model can't learn much from them. In a similar way, features with very high variance have very few values repeated, which makes it also difficult for a model to learn from this feature.</p> <p>FeatureSelector removes a categorical feature when the maximum number of occurrences for any value is below <code>min_repeated</code> or when the same value is repeated in at least <code>max_repeated</code> fraction of the rows. The default option is to remove a feature if all values in it are either different or exactly the same.</p> <p></p> <p>Removing features with multi-collinearity Two features that are highly correlated are redundant, i.e. two will not contribute more to the model than only one of them. FeatureSelector will drop a feature that has a Pearson correlation coefficient larger than <code>max_correlation</code> with another feature. A correlation of 1 means the two columns are equal. A dataframe of the removed features and their correlation values can be accessed through the <code>collinear</code> attribute.</p>"}, {"location": "user_guide/introduction/", "title": "Introduction", "text": "<p>There is no magic formula in data science that can tell us which type of machine learning estimator in combination with which pipeline will perform best for a given raw dataset. Different models are better suited for different types of data and different types of problems. You can follow some rough guide on how to approach problems with regard to which model to try, but these are incomplete at best.</p> <p>During the exploration phase of a machine learning project, a data scientist tries to find the optimal pipeline for his specific use case. This usually involves applying standard data cleaning steps, creating or selecting useful features, trying out different models, etc. Testing multiple pipelines requires many lines of code, and writing it all in the same notebook often makes it long and cluttered. On the other hand, using multiple notebooks makes it harder to compare the results and to keep an overview. On top of that, refactoring the code for every test can be quite time-consuming. How many times have you conducted the same action to pre-process a raw dataset? How many times have you copy-and-pasted code from an old repository to re-use it in a new use case?</p> <p>Although best practices tell us to start with a simple model and build up to more complicated ones, many data scientists just use the model best known to them in order to avoid the aforementioned problems. This can result in poor performance (because the model is just not the right one for the task) or in inefficient management of time and computing resources (because a simpler/faster model could have achieved a similar performance).</p> <p>ATOM is here to help solve these common issues. The package acts as a wrapper of the whole machine learning pipeline, helping the data scientist to rapidly find a good model for his problem. Avoid endless imports and documentation lookups. Avoid rewriting the same code over and over again. With just a few lines of code, it's now possible to perform basic data cleaning steps, select relevant features and compare the performance of multiple models on a given dataset, providing quick insights on which pipeline performs best for the task at hand.</p> <p>It is important to realize that ATOM is not here to replace all the work a data scientist has to do before getting his model into production. ATOM doesn't spit out production-ready models just by tuning some parameters in its API. After helping you determine the right pipeline, you will most probably need to fine-tune it using use-case specific features and data cleaning steps in order to achieve maximum performance.</p> <p>Example steps taken by ATOM's pipeline:</p> <ol> <li>Data Cleaning<ul> <li>Handle missing values</li> <li>Encode categorical features</li> <li>Detect and remove outliers</li> <li>Balance the training set</li> </ul> </li> <li>Feature engineering<ul> <li>Create new non-linear features</li> <li>Select the most promising features</li> </ul> </li> <li>Train and validate multiple models<ul> <li>Apply hyperparameter tuning</li> <li>Fit the models on the training set</li> <li>Evaluate the results on the test set</li> </ul> </li> <li>Analyze the results<ul> <li>Get the scores on various metrics</li> <li>Make plots to compare the model performances</li> </ul> </li> </ol> <p></p> <p></p> Figure 1. Diagram of a possible pipeline created by ATOM."}, {"location": "user_guide/logging/", "title": "Logging &amp; Tracking", "text": ""}, {"location": "user_guide/logging/#logging", "title": "Logging", "text": "<p>To start logging your experiments, fill the <code>logger</code> parameter with the name or path to store the logging file. If automatic naming is used, the file is saved using the __name__ of the class followed by the timestamp of the logger's creation, e.g. <code>ATOMClassifier_11May21_20h11m03s</code>. The logging file contains method calls, all printed messages to stdout with maximum verbosity, and any exception raised during running. Additionally, the logging entries of external libraries are redirected to the same file handler.</p> <p></p>"}, {"location": "user_guide/logging/#tracking", "title": "Tracking", "text": "<p>ATOM uses MLflow Tracking as a backend API and UI for logging models, parameters, pipelines, data and plots. Start tracking your experiments assigning a name to the <code>experiment</code> parameter. Every model is tracked using a separate run. When no backend is configured, the data is stored locally at <code>./mlruns</code>. To configure the backend, use mlflow.set_tracking_uri in your notebook or IDE before initializing atom. This does not affect the currently active run (if one exists), but takes effect for successive runs. Run <code>mlflow ui</code> on your terminal to open MLflow's Tracking UI and  view it at http://localhost:5000.</p> <p>Note</p> <p>When using ATOM on Databricks, the experiment's name should include the complete path to the storage, e.g. <code>/Users/username@domain.com/experiment_name</code>.</p> <p>Example</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, experiment=\"breast_cancer\")\natom.run(models=[\"LR\", \"RF\", \"LGB\"], n_trials=(0, 0, 10))\n</code></pre> <p></p> <p></p>"}, {"location": "user_guide/logging/#dagshub-integration", "title": "DAGsHub integration", "text": "<p>ATOM has a build-in integration with DAGsHub, a web platform based on open source tools, optimized for data science and oriented towards the open source community. To store your mlflow experiments in a DAGsHub repo, type <code>dagshub:&lt;experiment_name&gt;</code> in the <code>experiment</code> parameter (instead of just the experiment's name). If the repo does not already exist, a new public repo is created.</p> <p>Info</p> <p>If you are logged into your DAGsHub account when initializing atom with a dagshub experiment, a page on your web browser is automatically opened to give access permissions. If not, read here how to set up your DAGsHub credentials.</p> <p>Example</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import load_breast_cancer\n\nX, y = load_breast_cancer(return_X_y=True, as_frame=True)\n\natom = ATOMClassifier(X, y, experiment=\"dagshub:breast_cancer\")\natom.run(models=[\"LR\", \"RF\"])\n</code></pre> <p></p> <p></p>"}, {"location": "user_guide/logging/#tracked-elements", "title": "Tracked elements", "text": "<p>Tags The runs are automatically tagged with the model's full name, the branch from which the model was trained, and the time it took to fit the model. Add additional custom tags through the <code>ht_params</code> parameter, e.g.  <code>atom.run([\"LR\", \"RF\"], ht_params={\"tags\": {\"tag1\": 1}})</code>.</p> <p>Parameters All parameters used by the estimator at initialization are tracked. Additional parameters passed to the fit method are not tracked.</p> <p>Model The model's estimator is stored as artifact. The estimator has to be compatible with the mlflow.sklearn, module. This option can be switched off using atom's <code>log_model</code> attribute, e.g. <code>atom.log_model = False</code>.</p> <p>Hyperparameter tuning If hyperparameter tuning is performed, every trial is tracked as a nested run in the model's main run. This option can be switched off using atom's <code>log_ht</code> attribute, e.g. <code>atom.log_ht = False</code>. The data and pipeline options are never stored within nested runs.</p> <p>Metrics All metric results are tracked, not only during training, but also when the evaluate method is called at a later point. Metrics calculated during in-training validation are also stored.</p> <p>Dataset The train and test sets used to fit and evaluate the model can be stored as <code>.csv</code> files to the run's artifacts. This option can be switched on using atom's <code>log_data</code> attribute, e.g. <code>atom.log_data = True</code>.</p> <p>Pipeline The model's pipeline (returned from the export_pipeline method) can be stored as an artifact. This option can be switched on using atom's <code>log_pipeline</code> attribute, e.g. <code>atom.log_pipeline = True</code>.</p> <p>Plots By default, plots are stored as <code>.html</code> artifacts in all runs corresponding to the models that are showed in the plot. If the <code>filename</code> parameter is specified, they are stored under that name, else the method's name is used. This option can be switched off using atom's <code>log_plots</code> attribute, e.g. <code>atom.log_plots = False</code>.</p>"}, {"location": "user_guide/models/", "title": "Models", "text": ""}, {"location": "user_guide/models/#predefined-models", "title": "Predefined models", "text": "<p>ATOM provides many models for classification and regression tasks that can be used to fit the data in the pipeline. After fitting, a class containing the underlying estimator is attached to atom as an attribute. We refer to these \"subclasses\" as models. Apart from the estimator, the models contain a variety of attributes and methods that can help you understand how the underlying estimator performed. They can be accessed using their acronyms, e.g. <code>atom.LGB</code> to access the LightGBM model. The available models and their corresponding acronyms are:</p> <ul> <li>AdaBoost (AdaB)</li> <li>ARIMA (Arima)</li> <li>AutoARIMA (AutoARIMA)</li> <li>AutomaticRelevanceDetermination (ARD)</li> <li>Bagging (Bag)</li> <li>BayesianRidge (BR)</li> <li>BernoulliNB (BNB)</li> <li>CatBoost (CatB)</li> <li>CategoricalNB (CatNB)</li> <li>ComplementNB (CNB)</li> <li>DecisionTree (Tree)</li> <li>Dummy (Dummy)</li> <li>ElasticNet (EN)</li> <li>ETS (ETS)</li> <li>ExponentialSmoothing (ES)</li> <li>ExtraTree (ETree)</li> <li>ExtraTrees (ET)</li> <li>GaussianNB (GNB)</li> <li>GaussianProcess (GP)</li> <li>GradientBoostingMachine (GBM)</li> <li>HuberRegression (Huber)</li> <li>HistGradientBoosting (hGBM)</li> <li>KNearestNeighbors (KNN)</li> <li>Lasso (Lasso)</li> <li>LeastAngleRegression (Lars)</li> <li>LightGBM (LGB)</li> <li>LinearDiscriminantAnalysis (LDA)</li> <li>LinearSVM (lSVM)</li> <li>LogisticRegression (LR)</li> <li>MultiLayerPerceptron (MLP)</li> <li>MultinomialNB (MNB)</li> <li>NaiveForecaster (NF)</li> <li>OrdinaryLeastSquares (OLS)</li> <li>OrthogonalMatchingPursuit (OMP)</li> <li>PassiveAggressive (PA)</li> <li>Perceptron (Perc)</li> <li>PolynomialTrend (PT)</li> <li>QuadraticDiscriminantAnalysis (QDA)</li> <li>RadiusNearestNeighbors (RNN)</li> <li>RandomForest (RF)</li> <li>Ridge (Ridge)</li> <li>StochasticGradientDescent (SGD)</li> <li>SupportVectorMachine (SVM)</li> <li>XGBoost (XGB)</li> </ul> <p>Warning</p> <p>The model classes can not be initialized directly by the user! Use them only through atom.</p> <p>Tip</p> <p>The acronyms are case-insensitive, e.g. <code>atom.lgb</code> also calls the LightGBM model.</p> <p></p>"}, {"location": "user_guide/models/#custom-models", "title": "Custom models", "text": "<p>It is also possible to create your own models in ATOM's pipeline. For example, imagine we want to use sklearn's RANSACRegressor estimator (note that is not included in ATOM's predefined models). There are two ways to achieve this:</p> <ul> <li>Using ATOMModel (recommended). With this approach you can pass   the required model characteristics to the pipeline.</li> </ul> <pre><code>from atom import ATOMRegressor, ATOMModel\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nransac = ATOMModel(RANSACRegressor, name=\"RANSAC\", needs_scaling=True)\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run(ransac)\n</code></pre> <ul> <li>Using the estimator's class or an instance of the class. This approach   will also call ATOMModel under the hood, but it will leave its   parameters to their default values.</li> </ul> <pre><code>from atom import ATOMRegressor\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.linear_model import RANSACRegressor\n\nX, y = load_diabetes(return_X_y=True, as_frame=True)\n\natom = ATOMRegressor(X, y)\natom.run(RANSACRegressor)\n</code></pre> <p>Additional things to take into account:</p> <ul> <li>Custom models can be accessed through their acronym like any other model, e.g.   <code>atom.ransac</code> in the example above.</li> <li>Custom models are not restricted to sklearn estimators, but they should   follow sklearn's API, i.e. have a fit and predict method.</li> <li>Parameter customization (for the initializer) is only possible for   custom models which provide an estimator that has a <code>set_params()</code> method,   i.e. it's a child class of BaseEstimator.</li> <li>Hyperparameter tuning for custom models is ignored unless appropriate   dimensions are provided through <code>ht_params</code>.</li> </ul> <p></p>"}, {"location": "user_guide/models/#deep-learning", "title": "Deep learning", "text": "<p>Deep learning models can be used through ATOM's custom models as long as they follow sklearn's API. For example, models implemented with the Keras package should use the scikeras wrappers KerasClassifier or KerasRegressor.</p> <p>Many deep learning use cases, for example in computer vision, use datasets with more than 2 dimensions, e.g. image data can have shape (n_samples, length, width, rgb). Luckily, scikeras has a workaround to be able to work with such datasets. Learn with this example how to use ATOM to train and validate a Convolutional Neural Network on an image dataset.</p> <p>Warning</p> <p>Models implemented with keras can only use custom hyperparameter tuning when <code>n_jobs=1</code> or <code>ht_params={\"cv\": 1}</code>. Using n_jobs &gt; 1 and cv &gt; 1 raises a PicklingError due to incompatibilities of the APIs.</p> <p></p>"}, {"location": "user_guide/models/#ensembles", "title": "Ensembles", "text": "<p>Ensemble models use multiple estimators to obtain better predictive performance than could be obtained from any of the constituent learning algorithms alone. ATOM implements two ensemble techniques: voting and stacking. Click here to see an example that uses ensemble models.</p> <p>If the ensemble's underlying estimator is a model that used automated feature scaling, it's added as a Pipeline containing the <code>scaler</code> and estimator. If a mlflow experiment is active, the ensembles start their own run, just like the predefined models do.</p> <p>Warning</p> <p>Combining models trained on different branches into one ensemble is not allowed and will raise an exception.</p>"}, {"location": "user_guide/models/#voting", "title": "Voting", "text": "<p>The idea behind voting is to combine the predictions of conceptually different models to make new predictions. Such a technique can be useful for a set of equally well performing models in order to balance out their individual weaknesses. Read more in sklearn's documentation.</p> <p>A voting model is created from a trainer through the voting method. The voting model is added automatically to the list of models in the trainer, under the <code>Vote</code> acronym. The underlying estimator is a custom adaptation of VotingClassifier or VotingRegressor depending on the task. The differences between ATOM's and sklearn's implementation are:</p> <ul> <li>ATOM's implementation doesn't fit estimators if they're already fitted.</li> <li>ATOM's instance is considered fitted at initialization when all underlying   estimators are.</li> <li>ATOM's VotingClassifier doesn't implement a LabelEncoder to encode the   target column.</li> </ul> <p>The two estimators are customized in this way to save time and computational resources, since the classes are always initialized with fitted estimators. As a consequence of this, the VotingClassifier can not use sklearn's build-in LabelEncoder for the target column since it can't be fitted when initializing the class. For the vast majority of use cases, the changes will have no effect. If you want to export the estimator and retrain it on different data, just make sure to clone the underlying estimators first.</p> <p></p>"}, {"location": "user_guide/models/#stacking", "title": "Stacking", "text": "<p>Stacking is a method for combining estimators to reduce their biases. More precisely, the predictions of each individual estimator are stacked together and used as input to a final estimator to compute the prediction. Read more in sklearn's documentation.</p> <p>A stacking model is created from a trainer through the stacking method. The stacking model is added automatically to the list of models in the trainer, under the <code>Stack</code> acronym. The underlying estimator is a custom adaptation of StackingClassifier or StackingRegressor depending on the task. The only difference between ATOM's and sklearn's implementation is that ATOM's implementation doesn't fit estimators if they're already fitted. The two estimators are customized in this way to save time and computational resources, since the classes are always initialized with fitted estimators. For the vast majority of use cases, the changes will have no effect. If you want to export the estimator and retrain it on different data, just make sure to clone the underlying estimators first.</p>"}, {"location": "user_guide/nlp/", "title": "Natural Language Processing", "text": "<p>Natural Language Processing (NLP) is the subfield of machine learning that works with human language data. The nlp module contains four classes that help to convert raw text to meaningful numeric values, ready to be ingested by a model. ATOM uses the nltk library for the majority of its NLP processes.</p> <p>The text documents are expected to be provided in a column of the dataframe named <code>corpus</code> (the name is case-insensitive). Only the corpus is changed by the transformers, leaving the rest of the columns as is. This mechanism allows atom to combine datasets containing a text corpus with other non-text features. If an array is provided as input, it should consist of only one feature containing the text (one document per row). ATOM will then automatically convert the array to a dataframe with the desired column name. Documents are expected to be strings or sequences of words. Click here for an example using text data.</p> <p>Note</p> <p>All of atom's NLP methods automatically adopt the relevant transformer attributes (<code>verbose</code>, <code>logger</code>) from atom. A different choice can be added as parameter to the method call, e.g. <code>atom.tokenize(verbose=0)</code>.</p> <p>Info</p> <p>ATOM doesn't do topic modelling! The module's goal is to help process text documents into features that can be used for supervised learning.</p> <p></p>"}, {"location": "user_guide/nlp/#text-cleaning", "title": "Text cleaning", "text": "<p>Text data is rarely clean. Whether it's scraped from a website or inferred from paper documents, it's always populated with irrelevant information for the model, such as email addresses, HTML tags, numbers or punctuation marks. Use the TextCleaner class to clean the corpus from such noise. It can be accessed from atom through the textclean method. Use the class' parameters to choose which transformations to perform. The available steps are:</p> <ul> <li>Decode unicode characters to their ascii representations.</li> <li>Convert all characters to lower case.</li> <li>Drop email addresses from the text.</li> <li>Drop URL links from the text.</li> <li>Drop HTML tags from the text.</li> <li>Drop emojis from the text.</li> <li>Drop numbers from the text.</li> <li>Drop punctuations from the text.</li> </ul> <p></p>"}, {"location": "user_guide/nlp/#tokenization", "title": "Tokenization", "text": "<p>Some text processing algorithms, like stemming or lemmatization, require the corpus to be made out of tokens, instead of strings, in order to know what to consider as words. Tokenization is used to achieve this. It separates every document into a sequence of smaller units. In this case, the words.</p> <p>Sometimes, words have a different meaning on their own than when combined with adjacent words. For example, the word <code>new</code> has a completely different meaning when the word <code>york</code> is directly after it than when it's not. These combinations of two words are called bigrams. When there are three words, they are called trigrams, and with four words quadgrams.</p> <p>The Tokenizer class converts a document into a sequence of words, and can create the most frequent bigrams, trigrams and quadgrams. It can be accessed from atom through the tokenize method.</p> <p></p>"}, {"location": "user_guide/nlp/#text-normalization", "title": "Text Normalization", "text": "<p>Normalization for texts is a process that converts a list of words to a more uniform standard. This is useful to reduce the amount of different information that the computer has to deal with, and therefore improves efficiency. The goal of normalization techniques like stemming and lemmatization is to reduce inflectional and related forms of a word to a common base form.</p> <p>Normalize the words in the corpus using the TextNormalizer class. It can be accessed from atom through the textnormalize method.</p> <p></p>"}, {"location": "user_guide/nlp/#vectorization", "title": "Vectorization", "text": "<p>Text data cannot be fed directly to the algorithms themselves, as most of them expect numerical feature vectors with a fixed size, rather than words in the text documents with variable length. Vectorization is the general process of turning a collection of text documents into numerical feature vectors. You can apply it to the corpus using the Vectorizer class. It can be accessed from atom through the vectorize method.</p> <p>Info</p> <p>All strategies can utilize GPU speed-up. Click here for further information about GPU acceleration.</p> <p></p> <p>Bag of Words The Bag of Words (BOW) strategy applies tokenization, counting and normalization to the corpus. Documents are described by word occurrences while completely ignoring the relative position information of the words in the document. The created columns are named with the words they are embedding with the prefix <code>corpus_</code>. Read more in sklearn's documentation.</p> <p></p> <p>TF-IDF In a large text corpus, some words will be very present (e.g. \u201cthe\u201d, \u201ca\u201d, \u201cis\u201d in English), hence carrying very little meaningful information about the actual contents of the document. If we were to feed the direct count data directly to a classifier, those very frequent terms would shadow the frequencies of rarer, yet more interesting, terms. Use the TF-IDF strategy to re-weight the count features into floating point values. The created columns are named with the words they are embedding with the prefix <code>corpus_</code>. Read more in sklearn's documentation.</p> <p></p> <p>Hashing The larger the corpus, the larger the vocabulary will grow and thus increasing the number of features and memory use. Use the Hashing strategy to hash the words to a specified number of features. The created features are named <code>hash0</code>, <code>hash1</code>, etc... Read more in sklearn's documentation.</p>"}, {"location": "user_guide/nomenclature/", "title": "Nomenclature", "text": "<p>This documentation consistently uses terms to refer to certain concepts related to this package. The most frequent terms are described hereunder.</p> <p></p> ATOM <p>Refers to this package.</p> atom <p>Instance of the ATOMClassifier or ATOMRegressor classes (note that the examples use it as the default variable name).</p> <p>branch</p> <p>A pipeline, corresponding dataset and models fitted to that dataset. See the branches section of the user guide.</p> categorical columns <p>Refers to all columns of type <code>object</code> or <code>category</code>.</p> class <p>Unique value in a column, e.g. a binary classifier has 2 classes in the target column.</p> dataframe <p>Two-dimensional, size-mutable, potentially heterogeneous tabular data of type pd.DataFrame or its modin counterpart.</p> dataframe-like <p>Any type object from which a dataframe can be created. This includes an iterable, a dict whose values are 1d-arrays, a two-dimensional list, tuple, np.ndarray or sps.csr_matrix, and most commonly, a dataframe. This is the standard input format for any dataset.</p> <p>Additionally, you can provide a callable whose output is any of the aforementioned types. This is useful when the dataset is very large and you are performing parallel operations, since it can avoid broadcasting a large dataset from the driver to the workers.</p> estimator <p>An object which manages the estimation and decoding of an algorithm. The algorithm is estimated as a deterministic function of a set of parameters, a dataset and a random state. Should implement a <code>fit</code> method. Often used interchangeably with predictor because of user preference.</p> index <p>Immutable sequence used for indexing and alignment of type pd.Index or their modin counterpart.</p> missing values <p>All values in the <code>missing</code> attribute, as well as <code>None</code>, <code>NaN</code>, <code>+inf</code> and <code>-inf</code>.</p> model <p>Instance of a model in the pipeline. Not to confuse with estimator.</p> outliers <p>Sample that contains one or more outlier values. Note that the Pruner class can use a different definition for outliers depending on the chosen strategy.</p> outlier value <p>Value that lies further than 3 times the standard deviation away from the mean of its column, i.e. |z-score| &gt; 3.</p> pipeline <p>Sequence of transformers in a specific (usually the current) branch.</p> predictor <p>An estimator implementing a <code>predict</code> method.</p> scorer <p>A non-estimator callable object which evaluates an estimator on given test data, returning a number. Unlike evaluation metrics, a greater returned number must correspond with a better score. See sklearn's documentation.</p> sequence <p>A one-dimensional, indexable array of type list, tuple, np.ndarray or series. This is the standard input format for a dataset's target column.</p> series <p>One-dimensional ndarray with axis labels of type pd.Series or its modin counterpart.</p> target <p>The dependent variable in a supervised learning task. Passed as <code>y</code> to an estimator's fit method.</p> task <p>One of the supervised machine learning approaches that ATOM supports:</p> <ul> <li>binary classification</li> <li>multiclass classification</li> <li>multilabel classification</li> <li>multiclass-multioutput classification</li> <li>regression</li> <li>multioutput regression</li> <li>univariate forecasting</li> <li>multivariate forecasting</li> </ul> transformer <p>An estimator implementing a <code>transform</code> method. This encompasses all data cleaning and feature engineering classes.</p>"}, {"location": "user_guide/plots/", "title": "Plots", "text": "<p>ATOM provides many plotting methods to analyze the data or compare the model performances. Descriptions and examples can be found in the API section. ATOM mainly uses the plotly library for plotting. Plotly makes interactive, publication-quality graphs that are rendered using html. Some plots require other libraries like matplotlib, shap, wordcloud and schemdraw.</p> <p>Plots that compare model performances (methods with the <code>models</code> parameter) can be called directly from atom, e.g. <code>atom.plot_roc()</code>, or from one of the models, e.g. <code>atom.adab.plot_roc()</code>. If called from atom, use the <code>models</code> parameter to specify which models to plot. If called from a specific model, it makes the plot only for that model and the <code>models</code> parameter becomes unavailable.</p> <p>Plots that analyze the data (methods without the <code>models</code> parameter) can only be called from atom, and not from the models.</p> <p></p>"}, {"location": "user_guide/plots/#parameters", "title": "Parameters", "text": "<p>Apart from the plot-specific parameters, all plots have five parameters in common:</p> <ul> <li>The <code>title</code> parameter adds a title to the plot. The default value doesn't   show any title. Provide a configuration (as dictionary) to customize its   appearance, e.g. <code>title=dict(text=\"Awesome plot\", color=\"red\")</code>.   Read more in plotly's documentation.</li> <li> <p>The <code>legend</code> parameter is used to show/hide, position or customize the   plot's legend. Provide a configuration (as dictionary) to customize its   appearance (e.g. <code>legend=dict(title=\"Title for legend\", title_font_color=\"red\")</code>)   or choose one of the following locations:</p> <ul> <li>upper left</li> <li>upper right</li> <li>lower left</li> <li>lower right</li> <li>upper center</li> <li>lower center</li> <li>center left</li> <li>center right</li> <li>center</li> <li>out: Position the legend outside the axis, on the right hand side. This   is plotly's default position. Note that this shrinks the size of the axis   to fit both legend and axes in the specified <code>figsize</code>.</li> </ul> </li> <li> <p>The <code>figsize</code> parameter adjust the plot's size.</p> </li> <li>The <code>filename</code> parameter is used to save the plot.</li> <li>The <code>display</code> parameter determines whether to show or return the plot.</li> </ul> <p></p>"}, {"location": "user_guide/plots/#aesthetics", "title": "Aesthetics", "text": "<p>The plot's aesthetics can be customized using the plot attributes prior to calling the plotting method, e.g. <code>atom.title_fontsize = 30</code>. The default values are:</p> <ul> <li>palette: [\"rgb(0, 98, 98)\", \"rgb(56, 166, 165)\", \"rgb(115, 175, 72)\",   \"rgb(237, 173, 8)\", \"rgb(225, 124, 5)\", \"rgb(204, 80, 62)\", \"rgb(148, 52, 110)\",   \"rgb(111, 64, 112)\", \"rgb(102, 102, 102)\"]</li> <li>title_fontsize: 24</li> <li>label_fontsize: 16</li> <li>tick_fontsize: 12</li> </ul> <p>Use atom's update_layout method to further customize the plot's layout using any of plotly's layout properties, e.g. <code>atom.update_layout(template=\"plotly_dark\")</code>. Similarly, use the update_traces method to customize the traces properties, e.g. <code>atom.update_traces(mode=\"lines+markers\")</code>.</p> <p>The reset_aesthetics method allows you to reset all aesthetics to their default value. See advanced plotting for an example.</p> <p></p>"}, {"location": "user_guide/plots/#canvas", "title": "Canvas", "text": "<p>Use the canvas method to draw multiple plots side by side, for example to make it easier to compare similar results. The canvas method is a <code>@contextmanager</code>, i.e. it's used through Python's <code>with</code> command. Plots in a canvas ignore the legend, figsize, filename and display parameters. Instead, specify these parameters in the canvas. If a variable is assigned to the canvas (e.g. <code>with atom.canvas() as fig</code>), it yields the resulting figure.</p> <p>For example, we can use a canvas to compare the results of a XGBoost and LightGBM model on the train and test set. We could also draw the lines for both models in the same axes, but that would clutter the plot too much. Click here for more examples.</p> <pre><code>from atom import ATOMClassifier\nfrom sklearn.datasets import make_classification\n\nX, y = make_classification(n_samples=1000, flip_y=0.2, random_state=1)\n\natom = ATOMClassifier(X, y)\natom.run([\"XGB\", \"LGB\"])\n\nwith atom.canvas(2, 2, title=\"XGBoost vs LightGBM\"):\n    atom.xgb.plot_roc(dataset=\"train+test\", title=\"ROC - XGBoost\")\n    atom.lgb.plot_roc(dataset=\"train+test\", title=\"ROC - LightGBM\")\n    atom.xgb.plot_prc(dataset=\"train+test\", title=\"PRC - XGBoost\")\n    atom.lgb.plot_prc(dataset=\"train+test\", title=\"PRC - LightGBM\")\n</code></pre> <p></p>"}, {"location": "user_guide/plots/#shap", "title": "SHAP", "text": "<p>The SHAP (SHapley Additive exPlanations) python package uses a game theoretic approach to explain the output of any machine learning model. It connects optimal credit allocation with local explanations using the classic Shapley values from game theory and their related extensions. ATOM implements methods to plot 7 of SHAP's plotting functions directly from its API. A list of available shap plots can be found here.</p> <p>Calculating the Shapley values is computationally expensive, especially for model agnostic explainers like Permutation. To avoid having to recalculate the values for every plot, ATOM stores the shapley values internally after the first calculation, and access them later when needed again.</p> <p>Note</p> <p>Since the plot figures are not made by ATOM, note the following:</p> <ul> <li>It's not possible to draw multiple models in the same figure.   Selecting more than one model will raise an exception. To avoid   this, call the plot directly from a model, e.g. <code>atom.lr.plot_shap_force()</code>.</li> <li>The returned plot is a matplotlib figure, not plotly's.</li> </ul> <p></p>"}, {"location": "user_guide/plots/#available-plots", "title": "Available plots", "text": "<p>A list of available plots can be found hereunder. Note that not all plots can be called from every class and that their availability can depend on the task at hand.</p>"}, {"location": "user_guide/plots/#feature-selection-plots", "title": "Feature selection plots", "text": "<p>plot_componentsPlot the explained variance ratio per component.plot_pcaPlot the explained variance ratio vs number of components.plot_rfecvPlot the rfecv results.</p>"}, {"location": "user_guide/plots/#data-plots", "title": "Data plots", "text": "<p>plot_correlationPlot a correlation matrix.plot_distributionPlot column distributions.plot_ngramsPlot n-gram frequencies.plot_qqPlot a quantile-quantile plot.plot_relationshipsPlot pairwise relationships in a dataset.plot_wordcloudPlot a wordcloud from the corpus.</p>"}, {"location": "user_guide/plots/#hyperparameter-tuning-plots", "title": "Hyperparameter tuning plots", "text": "<p>plot_edfPlot the Empirical Distribution Function of a study.plot_hyperparameter_importancePlot a model's hyperparameter importance.plot_hyperparametersPlot hyperparameter relationships in a study.plot_parallel_coordinatePlot high-dimensional parameter relationships in a study.plot_pareto_frontPlot the Pareto front of a study.plot_slicePlot the parameter relationship in a study.plot_terminator_improvementPlot the potentials for future objective improvement.plot_timelinePlot the timeline of a study.plot_trialsPlot the hyperparameter tuning trials.</p>"}, {"location": "user_guide/plots/#prediction-plots", "title": "Prediction plots", "text": "<p>plot_calibrationPlot the calibration curve for a binary classifier.plot_confusion_matrixPlot a model's confusion matrix.plot_detPlot the Detection Error Tradeoff curve.plot_errorsPlot a model's prediction errors.plot_evalsPlot evaluation curves.plot_feature_importancePlot a model's feature importance.plot_forecastPlot a time series with model forecasts.plot_gainsPlot the cumulative gains curve.plot_learning_curvePlot the learning curve: score vs number of training samples.plot_liftPlot the lift curve.plot_parshapPlot the partial correlation of shap values.plot_partial_dependencePlot the partial dependence of features.plot_permutation_importancePlot the feature permutation importance of models.plot_pipelinePlot a diagram of the pipeline.plot_prcPlot the precision-recall curve.plot_probabilitiesPlot the probability distribution of the target classes.plot_residualsPlot a model's residuals.plot_resultsPlot the model results.plot_rocPlot the Receiver Operating Characteristics curve.plot_successive_halvingPlot scores per iteration of the successive halving.plot_thresholdPlot metric performances against threshold values.</p>"}, {"location": "user_guide/plots/#shap-plots", "title": "Shap plots", "text": "<p>plot_shap_barPlot SHAP's bar plot.plot_shap_beeswarmPlot SHAP's beeswarm plot.plot_shap_decisionPlot SHAP's decision plot.plot_shap_forcePlot SHAP's force plot.plot_shap_heatmapPlot SHAP's heatmap plot.plot_shap_scatterPlot SHAP's scatter plot.plot_shap_waterfallPlot SHAP's waterfall plot.</p>"}, {"location": "user_guide/predicting/", "title": "Predicting", "text": ""}, {"location": "user_guide/predicting/#prediction-methods", "title": "Prediction methods", "text": "<p>After training a model, you probably want to make predictions on new, unseen data. Just like a sklearn estimator, you can call the prediction methods from the model, e.g. <code>atom.tree.predict(X)</code>.</p> <p>All prediction methods transform the provided data through the pipeline in the model's branch before making the predictions. Transformers that should only be applied on the training set are excluded from this step (e.g. outlier pruning or class balancing).</p> <p>The available prediction methods are the standard methods for estimators in sklearn's and sktime's API.</p> <p>For classification and regression tasks:</p> <p>decision_functionGet confidence scores on new data or existing rows.predictGet predictions on new data or existing rows.predict_log_probaGet class log-probabilities on new data or existing rows.predict_probaGet class probabilities on new data or existing rows.scoreGet a metric score on new data.</p> <p>For forecast tasks:</p> <p>predictGet predictions on new data or existing rows.predict_intervalGet prediction intervals on new data or existing rows.predict_probaGet probabilistic forecasts on new data or existing rows.predict_quantilesGet probabilistic forecasts on new data or existing rows.predict_varGet probabilistic forecasts on new data or existing rows.scoreGet a metric score on new data.</p>"}, {"location": "user_guide/predicting/#prediction-attributes", "title": "Prediction attributes", "text": "<p>The prediction methods can be calculated on the train, test and holdout set. You can access them through attributes of the form [method]_[data_set], e.g. <code>atom.mnb.predict_train</code>, <code>atom.mnb.predict_test</code> or <code>atom.mnb.predict_holdout</code>. The results are cached after the first call to avoid consequent expensive calculations (lazy properties). This mechanism can increase the size of the instance for large datasets. Use the clear method if you need to free the memory.</p> <p>Warning</p> <p>The prediction attributes for the <code>score</code> method return atom's metric score on that set, not the metric returned by sklearn's score method for estimators. Use the method's <code>metric</code> parameter to calculate a different metric.</p> <p>Note</p> <p>The <code>predict_proba</code> method of some meta-estimators for multioutput tasks (such as MultioutputClassifier) return 3 dimensions, namely, a list of arrays with shape=(n_samples, n_classes). One array per target column. Since ATOM's prediction methods return pandas objects, such 3-dimensional arrays are converted to a multiindex pd.DataFrame, where the first level of the row indices are the target columns, and the second level are the classes. Use <code>.loc[[name_of_target_column]]</code> to only select the predictions for one target.</p>"}, {"location": "user_guide/predicting/#predictions-on-rows-in-the-dataset", "title": "Predictions on rows in the dataset", "text": "<p>It's also possible to get the prediction for a specific row or rows in the dataset, providing the names or positions of the rows to the prediction methods, e.g. <code>atom.rf.predict(10)</code> returns the random forest's prediction on the 10th row in the dataset, or <code>atom.rf.predict_proba([\"index1\", \"index2\"])</code> returns the class probabilities for the rows in the dataset with indices <code>index1</code> and <code>index2</code>.</p> <p>Note</p> <p>For forecast models, prediction on rows follow the ForecastingHorizon API. That means that using the row index works, but for example using <code>atom.arima.predict(1)</code> returns the prediction on the first row of the test set (instead of the second row of the train set).</p>"}, {"location": "user_guide/time_series/", "title": "Time series", "text": ""}, {"location": "user_guide/time_series/#forecast", "title": "Forecast", "text": ""}, {"location": "user_guide/time_series/#time-series-classification", "title": "Time series classification", "text": ""}, {"location": "user_guide/time_series/#time-series-regression", "title": "Time series regression", "text": ""}, {"location": "user_guide/training/", "title": "Training", "text": "<p>The training phase is where the models are fitted on the training data. After this, you can use the plots and prediction methods to evaluate the results. The training applies the following steps for all models:</p> <ol> <li>Use hyperparameter tuning to select the optimal hyperparameters for     the model (optional).</li> <li>The model is fitted on the training set using the best combination    of hyperparameters found. After that, the model is evaluated on the tes set.</li> <li>Calculate various scores on the test set using a bootstrap    algorithm (optional).</li> </ol> <p>There are three approaches to run the training.</p> <ul> <li>Direct training:<ul> <li>DirectClassifier</li> <li>DirectForecaster</li> <li>DirectRegressor</li> </ul> </li> <li>Training via successive halving:<ul> <li>SuccessiveHalvingClassifier</li> <li>SuccessiveHalvingForecaster</li> <li>SuccessiveHalvingRegressor</li> </ul> </li> <li>Training via train sizing:<ul> <li>TrainSizingClassifier</li> <li>TrainSizingForecaster</li> <li>TrainSizingRegressor</li> </ul> </li> </ul> <p>The direct fashion repeats the aforementioned steps only once, while the other two approaches repeats them more than once. Just like the data cleaning and feature engineering classes, it's discouraged to use these classes directly. Instead, every approach can be called directly from atom through the run, successive_halving and train_sizing methods respectively.</p> <p>Models are called through their acronyms, e.g. <code>atom.run(models=\"RF\")</code> will train a RandomForest. If you want to run the same model multiple times, add a tag after the acronym to differentiate them. the tag must be  separated from the accronym by an underscore.</p> <pre><code>atom.run(\n    models=[\"RF_1\", \"RF_2\"],\n    est_params={\n        \"RF_1\": {\"n_estimators\": 100},\n        \"RF_2\": {\"n_estimators\": 200},\n    }\n)\n</code></pre> <p>For example, this pipeline fits two Random Forest models, one with 100 and the other with 200 decision trees. The models can be accessed through <code>atom.rf_1</code> and <code>atom.rf_2</code>. Use tagged models to test how the same model performs when fitted with different parameters or on different data sets. See the Imbalanced datasets example.</p> <p>Additional things to take into account:</p> <ul> <li>If an exception is encountered while fitting an estimator, the   pipeline will automatically jump to the next model. The exceptions are   stored in the <code>errors</code> attribute. Note that when a model is skipped,   there is no model subclass for that estimator.</li> <li>When showing the final results, a <code>!</code> indicates the highest score   and a <code>~</code> indicates that the model is possibly overfitting (training   set has a score at least 20% higher than the test set).</li> </ul> <p></p>"}, {"location": "user_guide/training/#metric", "title": "Metric", "text": "<p>ATOM uses sklearn's scorers for model evaluation. A scorer consists of a metric function and some parameters that define the scorer's properties , such as if a higher or lower score is better (score or loss function) or if the function needs probability estimates or rounded predictions (see the make_scorer function). The <code>metric</code> parameter accepts three ways of defining the scorer:</p> <ul> <li>Using the name of one of the predefined scorers.</li> <li>Using a function with signature <code>function(y_true, y_pred) -&gt; score</code>.   In this case, ATOM uses make_scorer   with default parameters.</li> <li>Using a scorer object.</li> </ul> <p>Note that all scorers follow the convention that higher return values are better than lower return values. Thus, metrics which measure the distance between the model and the data (i.e. loss functions), like <code>max_error</code> or <code>mean_squared_error</code>, will return the negated value of the metric.</p> <p></p>"}, {"location": "user_guide/training/#predefined-scorers", "title": "Predefined scorers", "text": "<p>ATOM accepts all sklearn's scorers as well as some custom acronyms and custom scorers. Since some of sklearn's scorers have quite long names and ATOM is all about lazyfast experimentation, the package provides acronyms for some of the most commonly used ones. These acronyms are case-insensitive and can be used in the <code>metric</code> parameter instead of the scorer's full name, e.g. <code>atom.run(\"LR\", metric=\"BA\")</code> uses <code>balanced_accuracy</code>. The available acronyms are:</p> <ul> <li>\"AP\" for \"average_precision\"</li> <li>\"BA\" for \"balanced_accuracy\"</li> <li>\"AUC\" for \"roc_auc\"</li> <li>\"LogLoss\" for \"neg_log_loss\"</li> <li>\"EV\" for \"explained_variance\"</li> <li>\"ME\" for \"max_error\"</li> <li>\"MAE\" for \"neg_mean_absolute_error\"</li> <li>\"MSE\" for \"neg_mean_squared_error\"</li> <li>\"RMSE\" for \"neg_root_mean_squared_error\"</li> <li>\"MSLE\" for \"neg_mean_squared_log_error\"</li> <li>\"MEDAE\" for \"neg_median_absolute_error\"</li> <li>\"MAPE\" for \"neg_mean_absolute_percentage_error\"</li> <li>\"POISSON\" for \"neg_mean_poisson_deviance\"</li> <li>\"GAMMA\" for \"neg_mean_gamma_deviance\"</li> </ul> <p>ATOM also provides some extra common metrics for binary classification tasks. </p> <ul> <li>\"TN\" for True Negatives</li> <li>\"FP\" for False Positives</li> <li>\"FN\" for False Negatives</li> <li>\"TP\" for True Positives</li> <li>\"FPR\" for False Positive rate (fall-out)</li> <li>\"TPR\" for True Positive Rate (sensitivity, recall)</li> <li>\"TNR\" for True Negative Rate (specificity)</li> <li>\"FNR\" for False Negative Rate (miss rate)</li> <li>\"MCC\" for Matthews Correlation Coefficient (also for multiclass classification)</li> </ul> <p></p>"}, {"location": "user_guide/training/#multi-metric-runs", "title": "Multi-metric runs", "text": "<p>Sometimes it is useful to measure the performance of the models in more than one way. ATOM lets you run the pipeline with multiple metrics at the same time. To do so, provide the <code>metric</code> parameter with a list of desired metrics, e.g. <code>atom.run(\"LDA\", metric=[\"r2\", \"mse\"])</code>.</p> <p>When fitting multi-metric runs, the resulting scores will return a list of metrics. For example, if you provided three metrics to the pipeline, <code>atom.knn.score_train</code> could return [0.8734, 0.6672, 0.9001]. Only the first metric of a multi-metric run (this metric is called the main metric) is used to select the winning model.</p> <p>Info</p> <ul> <li>The <code>winning</code> model is retrieved comparing only   the main metric.</li> <li>Some plots let you choose which of the metrics in a multi-metric run   to show using the <code>metric</code> parameter, e.g. plot_results.</li> </ul> <p></p>"}, {"location": "user_guide/training/#automated-feature-scaling", "title": "Automated feature scaling", "text": "<p>Models that require feature scaling will automatically do so before training, unless the data is sparse or already scaled. The data is considered scaled if it has one of the following prerequisites:</p> <ul> <li>The mean value over the mean of all columns lies between -0.05 and 0.05   and the mean of the standard deviation over all columns lies between 0.85   and 1.15. Categorical and binary columns (only 0s and 1s) are excluded   from the calculation.</li> <li>There is a transformer in the pipeline whose __name__ contains the   word <code>scaler</code>.</li> </ul> <p>The scaling is applied using a Scaler with default parameters. It can be accessed from the model through the <code>scaler</code> attribute. The scaled dataset can be examined through the model's data attributes. Use the available_models method to see which models require feature scaling. See here an example.</p> <p></p>"}, {"location": "user_guide/training/#in-training-validation", "title": "In-training validation", "text": "<p>Some predefined models allow in-training validation. This means that the estimator is evaluated (using only the main metric) on the train and test set after every round of the training (a round can be an iteration for linear models or an added tree for boosted tree models). The validation scores are stored in the <code>evals</code> attribute, a dictionary of the train and test performances per round (also when pruning isn't applied). Click here for an example using in-training validation.</p> <p>The predefined models that support in-training validation are:</p> <ul> <li>CatBoost</li> <li>LightGBM</li> <li>MultiLayerPerceptron</li> <li>PassiveAggressive</li> <li>Perceptron</li> <li>StochasticGradientDescent</li> <li>XGBoost</li> </ul> <p>To apply in-training validation to a custom model, use the <code>has_validation</code> parameter when creating the custom model.</p> <p>Warning</p> <ul> <li>In-training validation is not calculated during hyperparameter tuning.</li> <li>CatBoost selects the weights achieved by the best evaluation on the test set after training. This means that, by default, there is some minor data leakage in the test set. Use the <code>use_best_model=False</code> parameter to avoid this behavior or use a holdout set to evaluate the final estimator.</li> </ul> <p>Tip</p> <p>Use the plot_evals method to visualize the in-training validation on the train and test sets.</p> <p></p>"}, {"location": "user_guide/training/#parameter-customization", "title": "Parameter customization", "text": "<p>By default, every estimator uses the default parameters they get from their respective packages. To select different ones, use the <code>est_params</code>. parameter of the run method. There are two ways to add custom parameters to the models: adding them directly to the dictionary as key-value pairs or through dictionaries.</p> <p>Adding the parameters directly to <code>est_params</code> (or using a dict with the key 'all') shares them across all models in the trainer. In this example, both the XGBoost and the LightGBM model use 200 boosted trees. Make sure all the models do have the specified parameters or an exception will be raised!</p> <pre><code>atom.run(models=[\"XGB\", \"LGB\"], est_params={\"n_estimators\": 200})\n</code></pre> <p>To specify parameters per model, use the model name as key and a dict of the parameters as value. In this example, the XGBoost model uses <code>n_estimators=200</code> and the MultiLayerPerceptron uses one hidden layer with 75 neurons.</p> <pre><code>atom.run(\n    models=[\"XGB\", \"MLP\"],\n    est_params={\n        \"XGB\": {\"n_estimators\": 200},\n        \"MLP\": {\"hidden_layer_sizes\": (75,)},\n    }\n)\n</code></pre> <p>Some estimators allow you to pass extra parameters to the fit method (besides X and y). This can be done adding <code>_fit</code> at the end of the parameter. For example, to change XGBoost's verbosity, we can run:</p> <pre><code>atom.run(models=\"XGB\", est_params={\"verbose_fit\": True})\n</code></pre> <p>Note</p> <p>If a parameter is specified through <code>est_params</code>, it's ignored by the study, even if it's added manually to <code>ht_params[\"distributions\"]</code>.</p> <p>Info</p> <p>The estimator's <code>n_jobs</code> and <code>random_state</code> parameters adopt atom's values (when available), unless specified through <code>est_params</code>.</p> <p></p>"}, {"location": "user_guide/training/#hyperparameter-tuning", "title": "Hyperparameter tuning", "text": "<p>In order to achieve maximum performance, it's important to tune an estimator's hyperparameters before training it. ATOM provides hyperparameter tuning through the optuna package. Just like optuna, we use the terms <code>study</code> and <code>trial</code> as follows:</p> <ul> <li>Study: optimization based on an objective function.</li> <li>Trial: a single execution of the objective function.</li> </ul> <p>Each trial is either computed by cross-validation on the complete training set or by randomly splitting the training set every iteration into a (sub)training and validation set. This process can create some minimum data leakage towards specific parameters (since the estimator is evaluated on data that is used to train the next estimator), but it ensures maximal use of the provided data. However, the leakage is not present in the independent test set, thus the final score of every model is unbiased. Note that, if the dataset is relatively small, the tuning's best score can consistently be lower than the final score on the test set due to the considerable lower fraction of instances on which it is trained. After finishing the study, the parameters that resulted in the best score are used to fit the final model on the complete training set.</p> <p>Info</p> <ul> <li>Unless specified differently by the user, the used samplers   are TPESampler   for single-metric runs and NSGAIISampler   for multi-metric runs.</li> <li>For multi-metric runs, the selected best trial   is the trial that performed best on the main metric. Use the property's   <code>@setter</code> to change it to any other trial. See the hyperparameter tuning   example.</li> </ul> <p>There are many possibilities to tune the study to your liking. The main parameter is <code>n_trials</code>, which determine the number of trials that are performed.</p> <p>Extra things to take into account:</p> <ul> <li>The train/validation splits are different per trial but equal for all models.</li> <li>Re-evaluating the objective function at the same point (with the same   hyperparameters) automatically skips the calculation and returns the   same score as the equivalent trial.</li> </ul> <p>Tip</p> <p>The hyperparameter tuning output can become quite wide for models with many hyperparameters. If you are working in a Jupyter Notebook, you can change the output's width running the following code in a cell: <pre><code>from IPython.display import display, HTML\ndisplay(HTML(\"&lt;style&gt;.container { width:100% !important; }&lt;/style&gt;\"))\n</code></pre></p> <p>Other settings can be changed through the <code>ht_params</code> parameter, a dictionary where every key-value combination can be used to further customize the optimization.</p> <p>By default, which hyperparameters are tuned and their corresponding distributions are predefined by ATOM. Use the 'distributions' key to customize these. Just like with <code>est_params</code>, it's possible to share the same parameters across models or use a dictionary with the model name as key to specify the parameters for every individual model. Use the key 'all' to tune some hyperparameters for all models when you also want to tune other parameters only for specific ones. The following example tunes the <code>n_estimators</code> parameter for both models but the <code>max_depth</code> parameter only for the RandomForest.</p> <pre><code>atom.run(\n    models=[\"ET\", \"RF\"],\n    n_trials=30,\n    ht_params={\"distributions\": {\"all\": \"n_estimators\", \"RF\": \"max_depth\"}},\n)\n</code></pre> <p>Like the <code>columns</code> parameter in atom's methods, you can exclude parameters from the optimization adding <code>!</code> before its name. It's possible to exclude multiple parameters, but not to combine inclusion and exclusion for the same model. For example, to optimize a RandomForest using all its predefined parameters except <code>n_estimators</code>, run:</p> <pre><code>atom.run(\n    models=\"ET\",\n    n_trials=15,\n    ht_params={\"distributions\": \"!n_estimators\"},\n)\n</code></pre> <p>If just the parameter name is provided, the predefined distribution is used. It's also possible to provide custom distributions spaces, but make sure they are compliant with optuna's API. See every model's individual documentation in ATOM's API section for an overview of their hyperparameters and distributions.</p> <pre><code>from optuna.distributions import (\n    IntDistribution, FloatDistribution, CategoricalDistribution\n)\n\natom.run(\n    models=[\"ET\", \"RF\"],\n    n_trials=30,\n    ht_params={\n        \"dimensions\": {\n            \"all\": {\"n_estimators\": IntDistribution(10, 100, step=10)},\n            \"RF\": {\n                \"max_depth\": IntDistribution(1, 10),\n                \"max_features\": CategoricalDistribution([\"sqrt\", \"log2\"]),\n           },\n        },\n    }\n)\n</code></pre> <p>Parameters for optuna's study and the study's optimize method can be added as kwargs to <code>ht_params</code>. For example, to use a different sampler or add a custom callback.</p> <pre><code>from optuna.samplers import RandomSampler\n\natom.run(\n    models=\"LR\",\n    n_trials=30,\n    ht_params={\n        \"sampler\": RandomSampler(seed=atom.random_state),\n        \"callbacks\": custom_callback(),\n    },\n)\n</code></pre> <p>Note</p> <ul> <li>If you use the default sampler, it\u2019s recommended to consider setting   larger <code>n_trials</code> to make full use of the characteristics of TPESampler   because TPESampler uses some (by default, 10) trials for its startup.</li> <li>When specifying distributions manually, make sure to import the   distribution types from optuna: <code>from optuna.distributions import ...</code>.</li> </ul> <p>Warning</p> <p>Keras' models can only use hyperparameter tuning when <code>n_jobs=1</code> or <code>ht_params={\"cv\": 1}</code>. Using n_jobs &gt; 1 and cv &gt; 1 raises a PicklingError due to incompatibilities of the APIs. Read here more about deep learning models.</p> <p>Tip</p> <p>ATOM has several plots that can help you examine a model's study and trials. Have a look at them here.</p> <p></p>"}, {"location": "user_guide/training/#pruning", "title": "Pruning", "text": "<p>During hyperparameter tuning, pruning stops unpromising trials at the early stages of the training (a.k.a., automated early-stopping). This can save the pipeline much time that would otherwise be wasted on an estimator that is unlikely to yield the best results. A pruned trial can't be selected as <code>best_trial</code>. Click here to see an example that uses pruning.</p> <p>The study uses MedianPruner as default pruner. You can use any other of optuna's pruners through the <code>ht_params</code> parameter.</p> <pre><code>from optuna.pruners import HyperbandPruner\n\natom.run(\"SGD\", n_trials=30, ht_params={\"pruner\": HyperbandPruner()})\n</code></pre> <p>Warning</p> <ul> <li>Pruning is disabled for multi-metric runs.</li> <li>Pruning is only available for models that support in-training validation.</li> </ul> <p></p>"}, {"location": "user_guide/training/#bootstrapping", "title": "Bootstrapping", "text": "<p>After fitting the estimator, you can assess the robustness of the model using the bootstrap technique. This technique creates several new data sets selecting random  samples from the training set (with replacement) and evaluates them on  the test set. This way you can get a distribution of the performance of the model. The sets are the same for every model. The number of sets can be chosen through the <code>n_bootstrap</code> parameter.</p> <p>Tip</p> <p>Use the plot_results method to plot the boostrap scores in a boxplot.</p> <p></p>"}, {"location": "user_guide/training/#successive-halving", "title": "Successive halving", "text": "<p>Successive halving is a bandit-based algorithm that fits N models to 1/N of the data. The best half are selected to go to the next iteration where the process is repeated. This continues until only one model remains, which is fitted on the complete dataset. Beware that a model's performance can depend greatly on the amount of data on which it is trained. For this reason, we recommend only to use this technique with similar models, e.g. only using tree-based models.</p> <p>Run successive halving from atom via the successive_halving method. Consecutive runs of the same model are saved with the model's acronym followed by the number of models in the run. For example, a RandomForest in a run with 4 models would become model <code>RF4</code>.</p> <p>See here a successive halving example.</p> <p>Tip</p> <p>Use the plot_successive_halving method to see every model's performance per iteration of the successive halving.</p> <p></p>"}, {"location": "user_guide/training/#train-sizing", "title": "Train sizing", "text": "<p>When training models, there is usually a trade-off between model performance and computation time, that is regulated by the number of samples in the training set. Train sizing can be used to create insights in this trade-off, and help determine the optimal size of the training set. The models are fitted multiple times, ever-increasing the number of samples in the training set.</p> <p>Run train sizing from atom via the train_sizing method. The number of iterations and the number of samples per training can be specified with the <code>train_sizes</code> parameter. Consecutive runs of the same model are saved with the model's acronym followed by the fraction of rows in the training set (the <code>.</code> is removed from the fraction!). For example, a RandomForest in a run with 80% of the training samples would become model <code>RF08</code>.</p> <p>See here a train sizing example.</p> <p>Tip</p> <p>Use the plot_learning_curve method to see the model's performance per size of the training set.</p>"}]}
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 21f40b5d0..9f8e62b8e 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -2,707 +2,707 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://tvdboom.github.io/ATOM/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/about/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/contributing/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/dependencies/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/faq/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/getting_started/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/license/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/ATOM/atomclassifier/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/ATOM/atomforecaster/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/ATOM/atommodel/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/ATOM/atomregressor/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/balancer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/cleaner/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/discretizer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/encoder/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/imputer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/normalizer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/pruner/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/data_cleaning/scaler/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/feature_engineering/featureextractor/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/feature_engineering/featuregenerator/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/feature_engineering/featuregrouper/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/feature_engineering/featureselector/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/adab/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/ard/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/arima/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/autoarima/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/bag/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/bnb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/br/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/catb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/catnb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/cnb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/dummy/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/en/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/es/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/et/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/etree/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/ets/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/gbm/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/gnb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/gp/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/hgbm/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/huber/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/knn/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lars/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lasso/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lda/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lgb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lr/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/lsvm/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/mlp/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/mnb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/nf/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/ols/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/omp/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/pa/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/perc/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/pt/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/qda/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/rf/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/ridge/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/rnn/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/sgd/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/svm/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/tree/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/models/xgb/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/nlp/textcleaner/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/nlp/textnormalizer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/nlp/tokenizer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/nlp/vectorizer/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_calibration/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_components/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_confusion_matrix/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_correlation/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_det/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_distribution/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_edf/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_errors/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_evals/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_feature_importance/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_forecast/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_gains/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_hyperparameter_importance/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_hyperparameters/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_learning_curve/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_lift/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_ngrams/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_parallel_coordinate/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_pareto_front/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_parshap/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_partial_dependence/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_pca/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_permutation_importance/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_pipeline/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_prc/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_probabilities/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_qq/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_relationships/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_residuals/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_results/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_rfecv/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_roc/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_bar/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_beeswarm/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_decision/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_force/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_heatmap/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_scatter/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_shap_waterfall/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_slice/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_successive_halving/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_terminator_improvement/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_threshold/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_timeline/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_trials/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/plots/plot_wordcloud/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/directclassifier/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/directforecaster/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/directregressor/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/successivehalvingclassifier/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/successivehalvingforecaster/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/successivehalvingregressor/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/trainsizingclassifier/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/trainsizingforecaster/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/API/training/trainsizingregressor/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/changelog/v4.x.x/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/changelog/v5.x.x/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/accelerating/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/data_cleaning/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/data_management/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/feature_engineering/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/introduction/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/logging/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/models/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/nlp/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/nomenclature/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/plots/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/predicting/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/time_series/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://tvdboom.github.io/ATOM/user_guide/training/</loc>
-         <lastmod>2023-08-28</lastmod>
+         <lastmod>2023-08-30</lastmod>
          <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/docs/sitemap.xml.gz b/docs/sitemap.xml.gz
index 63cc9b1aa..6b16e3bde 100644
Binary files a/docs/sitemap.xml.gz and b/docs/sitemap.xml.gz differ
diff --git a/docs/user_guide/accelerating/index.html b/docs/user_guide/accelerating/index.html
index 4cfc35c14..ab27dd61f 100644
--- a/docs/user_guide/accelerating/index.html
+++ b/docs/user_guide/accelerating/index.html
@@ -1318,7 +1318,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1360,7 +1360,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1402,7 +1402,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3520,8 +3520,12 @@ <h3 id="pyarrow">pyarrow</h3>
 with pyarrow, which atom uses when specifying the pyarrow data engine.</p>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
-<p>The pyarrow backend doesn't work for <a class="autorefs autorefs-internal" href="../data_management/#sparse-datasets">sparse datasets</a>. If the
-dataset has any sparse columns, an exception is raised.</p>
+<ul>
+<li>The pyarrow backend doesn't work for <a class="autorefs autorefs-internal" href="../data_management/#sparse-datasets">sparse datasets</a>. If the
+  dataset has any sparse columns, an exception is raised.</li>
+<li>The <a class="autorefs autorefs-internal" href="../../API/models/lgb/#lightgbm">LightGBM</a> and <a class="autorefs autorefs-internal" href="../../API/models/xgb/#xgboost">XGBoost</a> models don't support pyarrow
+  dtypes.</li>
+</ul>
 </div>
 <h3 id="modin">modin</h3>
 <p>The <a href="https://modin.readthedocs.io/en/stable/">modin</a> library is a multi-threading, drop-in replacement for
@@ -3610,7 +3614,8 @@ <h3 id="cuml">cuML</h3>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
 <ul>
-<li>cuML estimators don't support <a class="autorefs autorefs-internal" href="../data_management/#multioutput-tasks">multioutput tasks</a>.</li>
+<li>cuML estimators don't support <a class="autorefs autorefs-internal" href="../data_management/#multioutput-tasks">multioutput tasks</a> nor the <a class="autorefs autorefs-internal" href="#pyarrow">pyarrow</a>
+  data engine.</li>
 <li>Install cuML using <code>pip install --extra-index-url=https://pypi.nvidia.com
   cuml-cu11</code> or <code>pip install --extra-index-url=https://pypi.nvidia.com
   cuml-cu12</code> depending on your CUDA version. Read more about RAPIDS'
diff --git a/docs/user_guide/data_cleaning/index.html b/docs/user_guide/data_cleaning/index.html
index 166d7bf0a..06ef1e0e0 100644
--- a/docs/user_guide/data_cleaning/index.html
+++ b/docs/user_guide/data_cleaning/index.html
@@ -1252,7 +1252,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1294,7 +1294,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1336,7 +1336,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/data_management/index.html b/docs/user_guide/data_management/index.html
index 016dbf50f..aef84b32d 100644
--- a/docs/user_guide/data_management/index.html
+++ b/docs/user_guide/data_management/index.html
@@ -1306,7 +1306,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1348,7 +1348,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1390,7 +1390,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/feature_engineering/index.html b/docs/user_guide/feature_engineering/index.html
index 12ef03b34..7903b6797 100644
--- a/docs/user_guide/feature_engineering/index.html
+++ b/docs/user_guide/feature_engineering/index.html
@@ -1251,7 +1251,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1293,7 +1293,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1335,7 +1335,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/introduction/index.html b/docs/user_guide/introduction/index.html
index 11e503547..53691fe61 100644
--- a/docs/user_guide/introduction/index.html
+++ b/docs/user_guide/introduction/index.html
@@ -1172,7 +1172,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1214,7 +1214,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1256,7 +1256,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/logging/index.html b/docs/user_guide/logging/index.html
index d92daadb0..d76cf912d 100644
--- a/docs/user_guide/logging/index.html
+++ b/docs/user_guide/logging/index.html
@@ -1230,7 +1230,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1272,7 +1272,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1314,7 +1314,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/models/index.html b/docs/user_guide/models/index.html
index c4498e848..59d6f9dcd 100644
--- a/docs/user_guide/models/index.html
+++ b/docs/user_guide/models/index.html
@@ -1244,7 +1244,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1286,7 +1286,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1328,7 +1328,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/nlp/index.html b/docs/user_guide/nlp/index.html
index c65129405..c916cbfe9 100644
--- a/docs/user_guide/nlp/index.html
+++ b/docs/user_guide/nlp/index.html
@@ -1224,7 +1224,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1266,7 +1266,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1308,7 +1308,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/nomenclature/index.html b/docs/user_guide/nomenclature/index.html
index b394f6772..8568263ad 100644
--- a/docs/user_guide/nomenclature/index.html
+++ b/docs/user_guide/nomenclature/index.html
@@ -1172,7 +1172,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1214,7 +1214,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1256,7 +1256,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
@@ -3208,18 +3208,19 @@ <h1 id="nomenclature">Nomenclature</h1>
 <div style="margin: -1em 0 0 1.2em">
 <p>Two-dimensional, size-mutable, potentially heterogeneous tabular data of type
 <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html">pd.DataFrame</a>
-or its <a class="autorefs autorefs-internal" href="../accelerating/#modin">modin</a> counterpart.</p>
+or its <a href="https://modin.readthedocs.io/en/stable/flow/modin/pandas/dataframe.html">modin</a>
+counterpart.</p>
 </div>
 <div id="dataframe-like"><strong>dataframe-like</strong></div>
 <div style="margin: -1em 0 0 1.2em">
-<p>Any type object from which a [dataframe][] can be created. This includes an
+<p>Any type object from which a <a class="autorefs autorefs-internal" href="#dataframe">dataframe</a> can be created. This includes an
 <a href="https://docs.python.org/3/glossary.html#term-iterable">iterable</a>, a
 <a href="https://docs.python.org/3/library/functions.html#func-dict">dict</a> whose
 values are 1d-arrays, a two-dimensional <a href="https://docs.python.org/3/library/functions.html#func-list">list</a>,
 <a href="https://docs.python.org/3/library/functions.html#func-tuple">tuple</a>, <a href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html">np.ndarray</a> or
 <a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html">sps.csr_matrix</a>,
-and most commonly, a [dataframe][]. This is the standard input format
-for any dataset.</p>
+and most commonly, a <a class="autorefs autorefs-internal" href="#dataframe">dataframe</a>. This is the standard input format for
+any dataset.</p>
 <p>Additionally, you can provide a callable whose output is any of the
 aforementioned types. This is useful when the dataset is very large and
 you are performing <a class="autorefs autorefs-internal" href="../accelerating/#parallel-execution">parallel operations</a>, since it
@@ -3230,14 +3231,14 @@ <h1 id="nomenclature">Nomenclature</h1>
 <p>An object which manages the estimation and decoding of an algorithm.
 The algorithm is estimated as a deterministic function of a set of
 parameters, a dataset and a random state. Should implement a <code>fit</code>
-method. Often used interchangeably with [predictor][] because of user
+method. Often used interchangeably with <a class="autorefs autorefs-internal" href="#predictor">predictor</a> because of user
 preference.</p>
 </div>
 <div id="index"><strong>index</strong></div>
 <div style="margin: -1em 0 0 1.2em">
-<p>Immutable sequence used for indexing and alignment of type <a href="https://pandas.pydata.org/docs/reference/api/pandas.Index.html">pd.Index</a>,
-<a href="https://pandas.pydata.org/docs/reference/api/pandas.MultiIndex.html">pd.MultiIndex</a>
-or their <a class="autorefs autorefs-internal" href="../accelerating/#modin">modin</a> counterparts.</p>
+<p>Immutable sequence used for indexing and alignment of type <a href="https://pandas.pydata.org/docs/reference/api/pandas.Index.html">pd.Index</a>
+or their <a href="https://modin.readthedocs.io/en/stable/flow/modin/pandas/dataframe.html">modin</a>
+counterpart.</p>
 </div>
 <div id="missing values"><strong>missing values</strong></div>
 <div style="margin: -1em 0 0 1.2em">
@@ -3247,7 +3248,7 @@ <h1 id="nomenclature">Nomenclature</h1>
 <div id="model"><strong>model</strong></div>
 <div style="margin: -1em 0 0 1.2em">
 <p>Instance of a <a class="autorefs autorefs-internal" href="../models/#models">model</a> in the pipeline. Not to confuse with
-[estimator][].</p>
+<a class="autorefs autorefs-internal" href="#estimator">estimator</a>.</p>
 </div>
 <div id="outliers"><strong>outliers</strong></div>
 <div style="margin: -1em 0 0 1.2em">
@@ -3280,13 +3281,14 @@ <h1 id="nomenclature">Nomenclature</h1>
 <p>A one-dimensional, indexable array of type <a href="https://docs.python.org/3/library/functions.html#func-list">list</a>,
 <a href="https://docs.python.org/3/library/functions.html#func-tuple">tuple</a>,
 <a href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html">np.ndarray</a>
-or [series][]. This is the standard input format for a dataset's target column.</p>
+or <a class="autorefs autorefs-internal" href="#series">series</a>. This is the standard input format for a dataset's target column.</p>
 </div>
 <div id="series"><strong>series</strong></div>
 <div style="margin: -1em 0 0 1.2em">
 <p>One-dimensional ndarray with axis labels of type
 <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.html#pandas.Series">pd.Series</a>
-or its <a class="autorefs autorefs-internal" href="../accelerating/#modin">modin</a> counterpart.</p>
+or its <a href="https://modin.readthedocs.io/en/stable/flow/modin/pandas/series.html">modin</a>
+counterpart.</p>
 </div>
 <div id="target"><strong>target</strong></div>
 <div style="margin: -1em 0 0 1.2em">
diff --git a/docs/user_guide/plots/index.html b/docs/user_guide/plots/index.html
index def735738..6485a0ac1 100644
--- a/docs/user_guide/plots/index.html
+++ b/docs/user_guide/plots/index.html
@@ -1272,7 +1272,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1314,7 +1314,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1356,7 +1356,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/predicting/index.html b/docs/user_guide/predicting/index.html
index 770fae9f2..5f85cf954 100644
--- a/docs/user_guide/predicting/index.html
+++ b/docs/user_guide/predicting/index.html
@@ -1217,7 +1217,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1259,7 +1259,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1301,7 +1301,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/time_series/index.html b/docs/user_guide/time_series/index.html
index 77158d2a7..b4b959feb 100644
--- a/docs/user_guide/time_series/index.html
+++ b/docs/user_guide/time_series/index.html
@@ -1217,7 +1217,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1259,7 +1259,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1301,7 +1301,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs/user_guide/training/index.html b/docs/user_guide/training/index.html
index dcc5c3e76..661d2d8e7 100644
--- a/docs/user_guide/training/index.html
+++ b/docs/user_guide/training/index.html
@@ -1279,7 +1279,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/directforecaster/" class="md-nav__link">
-        DirectRegressor
+        DirectForecaster
       </a>
     </li>
   
@@ -1321,7 +1321,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/successivehalvingforecaster/" class="md-nav__link">
-        SuccessiveHalvingRegressor
+        SuccessiveHalvingForecaster
       </a>
     </li>
   
@@ -1363,7 +1363,7 @@
   
     <li class="md-nav__item">
       <a href="../../API/training/trainsizingforecaster/" class="md-nav__link">
-        TrainSizingRegressor
+        TrainSizingForecaster
       </a>
     </li>
   
diff --git a/docs_sources/changelog/v5.x.x.md b/docs_sources/changelog/v5.x.x.md
index 902edac67..61c65bbe0 100644
--- a/docs_sources/changelog/v5.x.x.md
+++ b/docs_sources/changelog/v5.x.x.md
@@ -44,6 +44,7 @@
 * Fixed a bug where the [register][adaboost-register] method failed in Databricks.
 * Fixed a bug where tuning hyperparameter for a `base_estimator` inside a custom
   meta-estimator would fail.
+* Fixed a bug where the data properties' `@setter` could fail for numpy arrays.
 
 
 <a name="v520"></a>
diff --git a/docs_sources/dependencies.md b/docs_sources/dependencies.md
index 0a639b53b..7dbc5d9b0 100644
--- a/docs_sources/dependencies.md
+++ b/docs_sources/dependencies.md
@@ -48,6 +48,7 @@ packages are necessary for its correct functioning.
 * **[scipy](https://www.scipy.org/)** (>=1.10.1)
 * **[shap](https://github.com/slundberg/shap/)** (>=0.42.1)
 * **[sktime](http://www.sktime.net/en/latest/)** (>=0.20.1)
+* **[typeguard](https://typeguard.readthedocs.io/en/latest/)** (>=4.1.3)
 * **[zoofs](https://jaswinder9051998.github.io/zoofs/)** (>=0.1.26)
 
 
@@ -67,7 +68,7 @@ additional libraries. You can install all the optional dependencies using
 * **[schemdraw](https://schemdraw.readthedocs.io/en/latest/index.html)** (>=0.16)
 * **[wordcloud](http://amueller.github.io/word_cloud/)** (>=1.9.2)
 * **[xgboost](https://xgboost.readthedocs.io/en/latest/)** (>=1.7.4)
-* **[ydata-profiling](https://github.com/ydataai/ydata-profiling)** (>=4.3.1)
+* **[ydata-profiling](https://github.com/ydataai/ydata-profiling)** (>=4.5.1)
 
 
 ### Development
diff --git a/docs_sources/scripts/autodocs.py b/docs_sources/scripts/autodocs.py
index b31965aa8..65043e36e 100644
--- a/docs_sources/scripts/autodocs.py
+++ b/docs_sources/scripts/autodocs.py
@@ -788,7 +788,6 @@ def render(markdown: str, **kwargs) -> str:
         Modified markdown/html source text of page.
 
     """
-
     autodocs = None
     while match := re.search("(:: )(\w.*?)(?=::|\n\n|\Z)", markdown, re.S):
         command = yaml.safe_load(match.group(2))
@@ -855,13 +854,16 @@ def types_conversion(dtype: str) -> str:
     """
     types = {
         "CustomDict": "dict",
+        "BOOL": "bool",
         "INT": "int",
         "FLOAT": "float",
+        "INDEX": "index",
         "SERIES": "series",
+        "SEQUENCE": "sequence",
         "DATAFRAME": "dataframe",
         "PANDAS": "series | dataframe",
-        "Branch": "[Branch][branches]",
-        "Model": "[model][models]",
+        "BRANCH": "[Branch][branches]",
+        "MODEL": "[model][models]",
         "Study": "[Study][]",
         "Trial": "[Trial][]",
         "FrozenTrial": "[FrozenTrial][]",
diff --git a/docs_sources/user_guide/accelerating.md b/docs_sources/user_guide/accelerating.md
index 087752cc5..fcddd017b 100644
--- a/docs_sources/user_guide/accelerating.md
+++ b/docs_sources/user_guide/accelerating.md
@@ -76,8 +76,10 @@ data. Pandas offers [native integration](https://pandas.pydata.org/docs/user_gui
 with pyarrow, which atom uses when specifying the pyarrow data engine.
 
 !!! warning
-    The pyarrow backend doesn't work for [sparse datasets][]. If the
-    dataset has any sparse columns, an exception is raised.
+    - The pyarrow backend doesn't work for [sparse datasets][]. If the
+      dataset has any sparse columns, an exception is raised.
+    - The [LightGBM][] and [XGBoost][] models don't support pyarrow
+      dtypes.
 
 
 ### modin
diff --git a/docs_sources/user_guide/nomenclature.md b/docs_sources/user_guide/nomenclature.md
index e6364fc47..4f393150a 100644
--- a/docs_sources/user_guide/nomenclature.md
+++ b/docs_sources/user_guide/nomenclature.md
@@ -38,7 +38,8 @@ the target column.
 <div markdown style="margin: -1em 0 0 1.2em">
 Two-dimensional, size-mutable, potentially heterogeneous tabular data of type
 [pd.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)
-or its [modin][] counterpart.
+or its [modin](https://modin.readthedocs.io/en/stable/flow/modin/pandas/dataframe.html)
+counterpart.
 </div>
 
 <div id="dataframe-like"><strong>dataframe-like</strong></div>
@@ -49,8 +50,8 @@ Any type object from which a [dataframe][] can be created. This includes an
 values are 1d-arrays, a two-dimensional [list](https://docs.python.org/3/library/functions.html#func-list),
 [tuple](https://docs.python.org/3/library/functions.html#func-tuple), [np.ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html) or
 [sps.csr_matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html),
-and most commonly, a [dataframe][]. This is the standard input format
-for any dataset.
+and most commonly, a [dataframe][]. This is the standard input format for
+any dataset.
 
 Additionally, you can provide a callable whose output is any of the
 aforementioned types. This is useful when the dataset is very large and
@@ -69,9 +70,9 @@ preference.
 
 <div id="index"><strong>index</strong></div>
 <div markdown style="margin: -1em 0 0 1.2em">
-Immutable sequence used for indexing and alignment of type [pd.Index](https://pandas.pydata.org/docs/reference/api/pandas.Index.html),
-[pd.MultiIndex](https://pandas.pydata.org/docs/reference/api/pandas.MultiIndex.html)
-or their [modin][] counterparts.
+Immutable sequence used for indexing and alignment of type [pd.Index](https://pandas.pydata.org/docs/reference/api/pandas.Index.html)
+or their [modin](https://modin.readthedocs.io/en/stable/flow/modin/pandas/dataframe.html)
+counterpart.
 </div>
 
 <div id="missing values"><strong>missing values</strong></div>
@@ -129,7 +130,8 @@ or [series][]. This is the standard input format for a dataset's target column.
 <div markdown style="margin: -1em 0 0 1.2em">
 One-dimensional ndarray with axis labels of type
 [pd.Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html#pandas.Series)
-or its [modin][] counterpart.
+or its [modin](https://modin.readthedocs.io/en/stable/flow/modin/pandas/series.html)
+counterpart.
 </div>
 
 <div id="target"><strong>target</strong></div>
diff --git a/docs_sources/user_guide/plots.md b/docs_sources/user_guide/plots.md
index 19ff1aa38..9a69cba5a 100644
--- a/docs_sources/user_guide/plots.md
+++ b/docs_sources/user_guide/plots.md
@@ -147,7 +147,7 @@ depend on the task at hand.
 
 ### Feature selection plots
 
-:: atom.plots:FeatureSelectorPlot
+:: atom.plots:FeatureSelectionPlot
     :: methods:
         toc_only: True
         solo_link: True
@@ -171,7 +171,7 @@ depend on the task at hand.
 
 ### Hyperparameter tuning plots
 
-:: atom.plots:HTPlot
+:: atom.plots:HyperparameterTuningPlot
     :: methods:
         toc_only: True
         solo_link: True
diff --git a/mkdocs.yml b/mkdocs.yml
index 5ad66a098..ac46f7c2a 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -162,13 +162,13 @@ nav:
             - Vectorizer: API/nlp/vectorizer.md
         - Training:
             - DirectClassifier: API/training/directclassifier.md
-            - DirectRegressor: API/training/directforecaster.md
+            - DirectForecaster: API/training/directforecaster.md
             - DirectRegressor: API/training/directregressor.md
             - SuccessiveHalvingClassifier: API/training/successivehalvingclassifier.md
-            - SuccessiveHalvingRegressor: API/training/successivehalvingforecaster.md
+            - SuccessiveHalvingForecaster: API/training/successivehalvingforecaster.md
             - SuccessiveHalvingRegressor: API/training/successivehalvingregressor.md
             - TrainSizingClassifier: API/training/trainsizingclassifier.md
-            - TrainSizingRegressor: API/training/trainsizingforecaster.md
+            - TrainSizingForecaster: API/training/trainsizingforecaster.md
             - TrainSizingRegressor: API/training/trainsizingregressor.md
         - Models:
             - AdaBoost: API/models/adab.md
diff --git a/pyproject.toml b/pyproject.toml
index 6914c598e..c748b5a42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
     "scipy>=1.10.1",
     "shap>=0.42.1",
     "sktime>=0.20.1",
+    "typeguard>=4.1.3",
     "zoofs>=0.1.26",
 ]
 
@@ -50,7 +51,7 @@ dependencies = [
 full = [
     "botorch>=0.8.5",
     "catboost>=1.2",
-#    "evalml>=0.79.0",
+#    "evalml>=0.79.0",  Clashes with category-encoders
     "explainerdashboard>=0.4.3",
     "gradio>=3.19.1",
     "lightgbm>=3.3.5",
@@ -58,7 +59,7 @@ full = [
     "schemdraw>=0.16",
     "wordcloud>=1.9.2",
     "xgboost>=1.7.4",
-    "ydata-profiling>=4.3.1",
+#    "ydata-profiling>=4.5.1",  Clashes with typeguard
 ]
 
 [tool.pdm.dev-dependencies]
@@ -94,7 +95,7 @@ dev = [
 
 [tool.pdm.version]
 source = "file"
-path = "atom/utils.py"
+path = "atom/utils/constants.py"
 
 [tool.pdm.build]
 includes = ["atom"]
diff --git a/tests/test_atom.py b/tests/test_atom.py
index 3dc527a92..83e507af7 100644
--- a/tests/test_atom.py
+++ b/tests/test_atom.py
@@ -501,13 +501,6 @@ def test_custom_params_to_method():
     assert atom.pipeline[0].verbose == 2
 
 
-def test_add_no_transformer():
-    """Assert that an error is raised if the estimator has no estimator."""
-    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    with pytest.raises(AttributeError, match=".*should have a transform method.*"):
-        atom.add(RandomForestClassifier())
-
-
 def test_add_basetransformer_params_are_attached():
     """Assert that the n_jobs and random_state params from atom are used."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
diff --git a/tests/test_baserunner.py b/tests/test_baserunner.py
index 0cb37ad14..4f62a3bfe 100644
--- a/tests/test_baserunner.py
+++ b/tests/test_baserunner.py
@@ -169,13 +169,6 @@ def test_getitem_list():
     assert isinstance(atom[["mean radius", "mean texture"]], pd.DataFrame)
 
 
-def test_getitem_invalid_type():
-    """Assert that an error is raised when getitem is invalid type."""
-    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    with pytest.raises(TypeError, match=".*subscriptable with types.*"):
-        print(atom[2.3])
-
-
 # Test utility properties ========================================== >>
 
 def test_branch_property():
@@ -303,8 +296,8 @@ def test_get_models_by_slice():
 def test_get_models_winner():
     """Assert that the winner is returned when used as name."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.run(["LR_1", "LR_2"])
-    assert atom._get_models(models="winner") == [atom.lr_1]
+    atom.run(["LR", "LDA"])
+    assert atom._get_models(models="winner") == [atom.lr]
 
 
 def test_get_models_by_str():
@@ -349,7 +342,7 @@ def test_get_models_wrong_type():
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
     atom.run("LR")
     with pytest.raises(TypeError, match=".*type for the models parameter.*"):
-        atom._get_models(models=atom)
+        atom._get_models(models=[atom])
 
 
 def test_get_models_include_or_exclude():
diff --git a/tests/test_basetrainer.py b/tests/test_basetrainer.py
index 9fff79b07..6b058d155 100644
--- a/tests/test_basetrainer.py
+++ b/tests/test_basetrainer.py
@@ -315,13 +315,6 @@ def test_ht_params_invalid_key():
         trainer.run(bin_train, bin_test)
 
 
-def test_errors_invalid():
-    """Assert that an error is raised when errors is invalid."""
-    trainer = DirectClassifier(models="LR", errors="invalid", random_state=1)
-    with pytest.raises(ValueError, match=".*errors parameter.*"):
-        trainer.run(bin_train, bin_test)
-
-
 # Test _core_iteration ============================================= >>
 
 def test_sequence_parameters():
diff --git a/tests/test_basetransformer.py b/tests/test_basetransformer.py
index 9e453e87d..646ec516a 100644
--- a/tests/test_basetransformer.py
+++ b/tests/test_basetransformer.py
@@ -65,41 +65,27 @@ def test_device_parameter():
     assert os.environ["CUDA_VISIBLE_DEVICES"] == "0"
 
 
-def test_engine_parameter_invalid():
-    """Assert that an error is raised when engine is invalid."""
-    with pytest.raises(ValueError, match=".*with keys 'data'.*"):
-        BaseTransformer(engine="invalid")
-
-    with pytest.raises(ValueError, match=".*Choose from: numpy.*"):
-        BaseTransformer(engine={"data": "invalid"})
-
-    with pytest.raises(ValueError, match=".*Choose from: sklearn.*"):
-        BaseTransformer(engine={"estimator": "invalid"})
-
-
 @patch("ray.init")
 def test_engine_parameter_modin(ray):
     """Assert that ray is initialized when modin is data backend."""
-    BaseTransformer(engine={"data": "modin"})
+    BaseTransformer(device="cpu", engine={"data": "modin"})
     assert ray.is_called_once
 
 
 def test_engine_parameter_env_var():
     """Assert that the environment variable is set."""
-    base = BaseTransformer(engine={"data": "pyarrow"})
+    BaseTransformer(device="cpu", engine={"data": "pyarrow"})
     assert os.environ["ATOM_DATA_ENGINE"] == "pyarrow"
-    assert base.engine["estimator"] == "sklearn"
 
-    base = BaseTransformer(engine={"estimator": "sklearn"})  # No data key
+    BaseTransformer(device="cpu", engine={"estimator": "sklearn"})  # No data key
     assert os.environ["ATOM_DATA_ENGINE"] == "numpy"
-    assert base.engine["data"] == "numpy"
 
 
 @patch.dict("sys.modules", {"sklearnex": None})
 def test_engine_parameter_no_sklearnex():
     """Assert that an error is raised when sklearnex is not installed."""
     with pytest.raises(ModuleNotFoundError, match=".*import scikit-learn-intelex.*"):
-        BaseTransformer(engine={"estimator": "sklearnex"})
+        BaseTransformer(device="cpu", engine={"estimator": "sklearnex"})
 
 
 @pytest.mark.skipif(machine() not in ("x86_64", "AMD64"), reason="Only x86 support")
@@ -115,12 +101,6 @@ def test_engine_parameter_no_cuml():
         BaseTransformer(device="gpu", engine={"estimator": "cuml"})
 
 
-def test_backend_parameter_invalid():
-    """Assert that an error is raised when backend is invalid."""
-    with pytest.raises(ValueError, match=".*the backend parameter.*"):
-        BaseTransformer(backend="invalid")
-
-
 @patch("ray.init")
 def test_backend_parameter_ray(ray):
     """Assert that ray is initialized when selected."""
@@ -134,13 +114,6 @@ def test_backend_parameter():
     assert base.backend == "threading"
 
 
-@pytest.mark.parametrize("verbose", [-2, 3])
-def test_verbose_parameter(verbose):
-    """Assert that the verbose parameter is in correct range."""
-    with pytest.raises(ValueError, match=".*verbose parameter.*"):
-        BaseTransformer(verbose=verbose)
-
-
 def test_warnings_parameter_bool():
     """Assert that the warnings parameter works for a bool."""
     base = BaseTransformer(warnings=True)
@@ -150,12 +123,6 @@ def test_warnings_parameter_bool():
     assert base.warnings == "ignore"
 
 
-def test_warnings_parameter_invalid_str():
-    """Assert that an error is raised for an invalid string for warnings."""
-    with pytest.raises(ValueError, match=".*warnings parameter.*"):
-        BaseTransformer(warnings="test")
-
-
 def test_warnings_parameter_str():
     """Assert that the warnings parameter works for a str."""
     base = BaseTransformer(warnings="always")
@@ -432,7 +399,7 @@ def test_index_is_False():
 
 def test_index_is_int_invalid():
     """Assert that an error is raised when the index is an invalid int."""
-    with pytest.raises(ValueError, match=".*is out of range.*"):
+    with pytest.raises(IndexError, match=".*is out of range.*"):
         ATOMClassifier(X_bin, y_bin, index=1000, random_state=1)
 
 
@@ -466,7 +433,7 @@ def test_index_is_target():
 
 def test_index_is_sequence_no_data_sets_invalid_length():
     """Assert that an error is raised when len(index) != len(data)."""
-    with pytest.raises(ValueError, match=".*Length of index.*"):
+    with pytest.raises(IndexError, match=".*Length of index.*"):
         ATOMClassifier(X_bin, y_bin, index=[1, 2, 3], random_state=1)
 
 
@@ -479,7 +446,7 @@ def test_index_is_sequence_no_data_sets():
 
 def test_index_is_sequence_has_data_sets_invalid_length():
     """Assert that an error is raised when len(index) != len(data)."""
-    with pytest.raises(ValueError, match=".*Length of index.*"):
+    with pytest.raises(IndexError, match=".*Length of index.*"):
         ATOMClassifier(bin_train, bin_test, index=[1, 2, 3], random_state=1)
 
 
@@ -652,7 +619,7 @@ def test_test_size_int():
 def test_error_message_impossible_stratification():
     """Assert that the correct error is shown when stratification fails."""
     with pytest.raises(ValueError, match=".*stratify=False.*"):
-        ATOMClassifier(X_label[:100], y=y_label[:100], stratify=True, random_state=1)
+        ATOMClassifier(X_label[:50], y=y_label[:50], stratify=True, random_state=1)
 
 
 def test_input_is_X_y():
diff --git a/tests/test_branch.py b/tests/test_branch.py
index 3bde9527c..241cd142b 100644
--- a/tests/test_branch.py
+++ b/tests/test_branch.py
@@ -348,7 +348,7 @@ def test_get_rows_by_exact_match():
 def test_get_rows_by_int():
     """Assert that rows can be retrieved by their index position."""
     atom = ATOMClassifier(X_idx, y_idx, index=True, random_state=1)
-    with pytest.raises(ValueError, match=".*out of range.*"):
+    with pytest.raises(IndexError, match=".*out of range.*"):
         atom.branch._get_rows(index=1000)
     assert atom.branch._get_rows(index=100) == [atom.X.index[100]]
 
diff --git a/tests/test_data_cleaning.py b/tests/test_data_cleaning.py
index 20eca575c..f0c6f5e24 100644
--- a/tests/test_data_cleaning.py
+++ b/tests/test_data_cleaning.py
@@ -455,13 +455,6 @@ def test_kwargs_parameters():
 
 # Test Imputer ===================================================== >>
 
-def test_strat_num_parameter():
-    """Assert that the strat_num parameter is set correctly."""
-    imputer = Imputer(strat_num="invalid")
-    with pytest.raises(ValueError, match=".*Unknown strategy for the strat_num.*"):
-        imputer.fit(X_bin, y_bin)
-
-
 def test_invalid_max_nan_rows():
     """Assert that an error is raised for invalid max_nan_rows."""
     imputer = Imputer(max_nan_rows=-2)
@@ -623,13 +616,6 @@ def test_normalizer_check_is_fitted():
     pytest.raises(NotFittedError, Normalizer().transform, X_bin)
 
 
-def test_normalizer_invalid_strategy():
-    """Assert that an error is raised when strategy is invalid."""
-    normalizer = Normalizer(strategy="invalid")
-    with pytest.raises(ValueError, match=".*value for the strategy.*"):
-        normalizer.fit(X_bin)
-
-
 @pytest.mark.parametrize("strategy", ["yeojohnson", "boxcox", "quantile"])
 def test_normalizer_all_strategies(strategy):
     """Assert that all strategies work as intended."""
@@ -700,13 +686,6 @@ def test_invalid_method_for_non_z_score():
         pruner.transform(X_bin)
 
 
-def test_invalid_method_parameter():
-    """Assert that an error is raised for an invalid method parameter."""
-    pruner = Pruner(method="invalid")
-    with pytest.raises(ValueError, match=".*value for the method parameter.*"):
-        pruner.transform(X_bin)
-
-
 def test_invalid_max_sigma_parameter():
     """Assert that an error is raised for an invalid max_sigma parameter."""
     pruner = Pruner(max_sigma=0)
diff --git a/tests/test_models.py b/tests/test_models.py
index 59689b80b..7466fbe0a 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -115,7 +115,7 @@ def test_models_sklearnex_classification():
     )
 
 
-@pytest.mark.skipif(machine() not in ("x86_64", "AMD64"), reason="Only x86 support")
+@pytest.mark.skipif(machine() not in ("x86_64", "AMD64"), reason="Only x86 support.")
 def test_models_sklearnex_regression():
     """Assert the sklearnex engine works for regression tasks."""
     atom = ATOMRegressor(X_reg, y_reg, engine={"estimator": "sklearnex"}, random_state=1)
@@ -126,7 +126,13 @@ def test_models_sklearnex_regression():
     )
 
 
-@patch.dict("sys.modules", {"cuml": MagicMock(spec=["__spec__", "internals"])})
+@patch.dict(
+    "sys.modules", {
+        "cuml": MagicMock(spec=["__spec__"]),
+        "cuml.common.device_selection": MagicMock(spec=["set_global_device_type"]),
+        "cuml.internals.memory_utils": MagicMock(spec=["set_global_output_type"]),
+    }
+)
 def test_models_cuml_classification():
     """Assert that all classification models can be called with cuml."""
     atom = ATOMClassifier(X_bin, y_bin, engine={"estimator": "cuml"}, random_state=1)
@@ -149,7 +155,13 @@ def test_models_cuml_classification():
     )
 
 
-@patch.dict("sys.modules", {"cuml": MagicMock(spec=["__spec__", "internals"])})
+@patch.dict(
+    "sys.modules", {
+        "cuml": MagicMock(spec=["__spec__"]),
+        "cuml.common.device_selection": MagicMock(spec=["set_global_device_type"]),
+        "cuml.internals.memory_utils": MagicMock(spec=["set_global_output_type"]),
+    }
+)
 def test_models_cuml_regression():
     """Assert that all regression models can be called with cuml."""
     atom = ATOMRegressor(X_reg, y_reg, engine={"estimator": "cuml"}, random_state=1)
diff --git a/tests/test_nlp.py b/tests/test_nlp.py
index 88bc83e7e..d0ac2fddf 100644
--- a/tests/test_nlp.py
+++ b/tests/test_nlp.py
@@ -188,8 +188,14 @@ def test_hashing():
     assert "hash1" in X
 
 
-@patch.dict("sys.modules", {"cuml": MagicMock(spec=["__spec__", "internals"])})
-@patch.dict("sys.modules", {"cuml.feature_extraction.text": MagicMock()})
+@patch.dict(
+    "sys.modules", {
+        "cuml": MagicMock(spec=["__spec__"]),
+        "cuml.common.device_selection": MagicMock(spec=["set_global_device_type"]),
+        "cuml.internals.memory_utils": MagicMock(spec=["set_global_output_type"]),
+        "cuml.feature_extraction.text": MagicMock(),
+    }
+)
 def test_gpu():
     """Assert that the gpu implementation calls the get method of matrix."""
     vectorizer = Vectorizer(device="gpu", engine={"estimator": "cuml"})
diff --git a/tests/test_plots.py b/tests/test_plots.py
index dcb600c9f..36938d5a0 100644
--- a/tests/test_plots.py
+++ b/tests/test_plots.py
@@ -14,7 +14,8 @@
 from sklearn.metrics import f1_score, get_scorer
 
 from atom import ATOMClassifier, ATOMRegressor
-from atom.plots import Aesthetics, BaseFigure, BasePlot
+from atom.plots.base import Aesthetics, BaseFigure, BasePlot
+from atom.utils.types import LEGEND
 from atom.utils.utils import NotFittedError
 
 from .conftest import (
@@ -262,7 +263,7 @@ def test_get_set_multiple():
         atom._get_set(dataset="train+test", max_one=True)
 
 
-@patch("atom.plots.go.Figure.show")
+@patch("atom.plots.base.go.Figure.show")
 def test_custom_title_and_legend(func):
     """Assert that title and legend can be customized."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -271,18 +272,7 @@ def test_custom_title_and_legend(func):
     func.assert_called_once()
 
 
-@pytest.mark.parametrize("legend", [
-    "upper left",
-    "lower left",
-    "upper right",
-    "lower right",
-    "upper center",
-    "lower center",
-    "center left",
-    "center right",
-    "center",
-    "out",
-])
+@pytest.mark.parametrize("legend", LEGEND.__args__)
 def test_custom_legend_position(legend):
     """Assert that the legend position can be specified."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -290,14 +280,6 @@ def test_custom_legend_position(legend):
     atom.plot_roc(legend=legend, display=False)
 
 
-def test_custom_legend_position_invalid():
-    """Assert that an error is raised when the legend position is invalid."""
-    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.run("Tree")
-    with pytest.raises(ValueError, match=".*the legend parameter.*"):
-        atom.plot_roc(legend="invalid", display=False)
-
-
 @patch("mlflow.tracking.MlflowClient.log_figure")
 def test_figure_to_mlflow(mlflow):
     """Assert that the figure is logged to mlflow."""
@@ -309,7 +291,7 @@ def test_figure_to_mlflow(mlflow):
     assert mlflow.call_count == 3
 
 
-@patch("atom.plots.go.Figure.write_html")
+@patch("atom.plots.base.go.Figure.write_html")
 def test_figure_is_saved_html(func):
     """Assert that the figure is saved as .html by default."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -317,7 +299,7 @@ def test_figure_is_saved_html(func):
     func.assert_called_with("plot_correlation.html")
 
 
-@patch("atom.plots.go.Figure.write_image")
+@patch("atom.plots.base.go.Figure.write_image")
 def test_figure_is_saved_png(func):
     """Assert that the figure is saved as .png if specified."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -325,7 +307,7 @@ def test_figure_is_saved_png(func):
     func.assert_called_with("corr.png")
 
 
-@patch("atom.plots.plt.Figure.savefig")
+@patch("atom.plots.base.plt.Figure.savefig")
 def test_figure_is_saved_png_plt(func):
     """Assert that the figure is saved as .png if specified."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -368,7 +350,7 @@ def test_canvas_too_many_plots():
             atom.plot_prc()
 
 
-@patch("atom.plots.go.Figure.write_html")
+@patch("atom.plots.base.go.Figure.write_html")
 def test_figure_is_saved_canvas(func):
     """Assert that the figure is only saved after finishing the canvas."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -426,7 +408,7 @@ def test_plot_only_one_model():
         atom.plot_shap_beeswarm(display=False)
 
 
-# Test FeatureSelectorPlot ========================================= >>
+# Test FeatureSelectionPlot ========================================= >>
 
 @pytest.mark.parametrize("show", [10, None])
 def test_plot_components(show):
@@ -517,7 +499,7 @@ def test_plot_wordcloud():
     atom.plot_wordcloud(display=False)  # When corpus are tokens
 
 
-# Test HTPlot =========================================================== >>
+# Test HyperparameterTuningPlot ==================================== >>
 
 def test_plot_edf():
     """Assert that the plot_edf method works."""